{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use PPO to Play Acrobot-v1\n",
    "\n",
    "PyTorch version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import scipy.signal as signal\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import torch.distributions as distributions\n",
    "torch.manual_seed(0)\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:56:35 [INFO] env: <AcrobotEnv<Acrobot-v1>>\n",
      "22:56:35 [INFO] action_space: Discrete(3)\n",
      "22:56:35 [INFO] observation_space: Box(-28.274333953857422, 28.274333953857422, (6,), float32)\n",
      "22:56:35 [INFO] reward_range: (-inf, inf)\n",
      "22:56:35 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 15}\n",
      "22:56:35 [INFO] _max_episode_steps: 500\n",
      "22:56:35 [INFO] _elapsed_steps: None\n",
      "22:56:35 [INFO] id: Acrobot-v1\n",
      "22:56:35 [INFO] entry_point: gym.envs.classic_control:AcrobotEnv\n",
      "22:56:35 [INFO] reward_threshold: -100.0\n",
      "22:56:35 [INFO] nondeterministic: False\n",
      "22:56:35 [INFO] max_episode_steps: 500\n",
      "22:56:35 [INFO] _kwargs: {}\n",
      "22:56:35 [INFO] _env_name: Acrobot\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('Acrobot-v1')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class PPOReplayer:\n",
    "    def __init__(self):\n",
    "        self.fields = ['state', 'action', 'prob', 'advantage', 'return']\n",
    "        self.memory = pd.DataFrame(columns=self.fields)\n",
    "\n",
    "    def store(self, df):\n",
    "        self.memory = pd.concat([self.memory, df[self.fields]], ignore_index=True)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.memory.shape[0], size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.fields)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class PPOAgent:\n",
    "    def __init__(self, env):\n",
    "        self.gamma = 0.99\n",
    "\n",
    "        self.replayer = PPOReplayer()\n",
    "\n",
    "        self.actor_net = self.build_net(\n",
    "                input_size=env.observation_space.shape[0],\n",
    "                hidden_sizes=[100,],\n",
    "                output_size=env.action_space.n, output_activator=nn.Softmax(1))\n",
    "        self.actor_optimizer = optim.Adam(self.actor_net.parameters(), 0.001)\n",
    "        self.critic_net = self.build_net(\n",
    "                input_size=env.observation_space.shape[0],\n",
    "                hidden_sizes=[100,])\n",
    "        self.critic_optimizer = optim.Adam(self.critic_net.parameters(), 0.002)\n",
    "        self.critic_loss = nn.MSELoss()\n",
    "\n",
    "    def build_net(self, input_size, hidden_sizes, output_size=1,\n",
    "            output_activator=None):\n",
    "        layers = []\n",
    "        for input_size, output_size in zip(\n",
    "                [input_size,] + hidden_sizes, hidden_sizes + [output_size,]):\n",
    "            layers.append(nn.Linear(input_size, output_size))\n",
    "            layers.append(nn.ReLU())\n",
    "        layers = layers[:-1]\n",
    "        if output_activator:\n",
    "            layers.append(output_activator)\n",
    "        net = nn.Sequential(*layers)\n",
    "        return net\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        state_tensor = torch.as_tensor(observation, dtype=torch.float).unsqueeze(0)\n",
    "        prob_tensor = self.actor_net(state_tensor)\n",
    "        action_tensor = distributions.Categorical(prob_tensor).sample()\n",
    "        action = action_tensor.numpy()[0]\n",
    "        if self.mode == 'train':\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        if self.mode == 'train':\n",
    "            self.save_trajectory_to_replayer()\n",
    "            if len(self.replayer.memory) >= 1000:\n",
    "                for batch in range(5): # learn multiple times\n",
    "                    self.learn()\n",
    "                self.replayer = PPOReplayer() # reset replayer after the agent changes itself\n",
    "\n",
    "    def save_trajectory_to_replayer(self):\n",
    "        df = pd.DataFrame(\n",
    "                np.array(self.trajectory, dtype=object).reshape(-1, 4),\n",
    "                columns=['state', 'reward', 'done', 'action'])\n",
    "        state_tensor = torch.as_tensor(np.stack(df['state']), dtype=torch.float)\n",
    "        action_tensor = torch.as_tensor(df['action'], dtype=torch.long)\n",
    "        v_tensor = self.critic_net(state_tensor)\n",
    "        df['v'] = v_tensor.detach().numpy()\n",
    "        prob_tensor = self.actor_net(state_tensor)\n",
    "        pi_tensor = prob_tensor.gather(-1, action_tensor.unsqueeze(1)).squeeze(1)\n",
    "        df['prob'] = pi_tensor.detach().numpy()\n",
    "        df['next_v'] = df['v'].shift(-1).fillna(0.)\n",
    "        df['u'] = df['reward'] + self.gamma * df['next_v']\n",
    "        df['delta'] = df['u'] - df['v']\n",
    "        df['advantage'] = signal.lfilter([1.,], [1., -self.gamma],\n",
    "                df['delta'][::-1])[::-1]\n",
    "        df['return'] = signal.lfilter([1.,], [1., -self.gamma],\n",
    "                df['reward'][::-1])[::-1]\n",
    "        self.replayer.store(df)\n",
    "\n",
    "    def learn(self):\n",
    "        states, actions, old_pis, advantages, returns = \\\n",
    "                self.replayer.sample(size=64)\n",
    "        state_tensor = torch.as_tensor(states, dtype=torch.float)\n",
    "        action_tensor = torch.as_tensor(actions, dtype=torch.long)\n",
    "        old_pi_tensor = torch.as_tensor(old_pis, dtype=torch.float)\n",
    "        advantage_tensor = torch.as_tensor(advantages, dtype=torch.float)\n",
    "        return_tensor = torch.as_tensor(returns, dtype=torch.float).unsqueeze(1)\n",
    "\n",
    "        # train actor\n",
    "        all_pi_tensor = self.actor_net(state_tensor)\n",
    "        pi_tensor = all_pi_tensor.gather(1, action_tensor.unsqueeze(1)).squeeze(1)\n",
    "        surrogate_advantage_tensor = (pi_tensor / old_pi_tensor) * \\\n",
    "                advantage_tensor\n",
    "        clip_times_advantage_tensor = 0.1 * surrogate_advantage_tensor\n",
    "        max_surrogate_advantage_tensor = advantage_tensor + \\\n",
    "                torch.where(advantage_tensor > 0.,\n",
    "                clip_times_advantage_tensor, -clip_times_advantage_tensor)\n",
    "        clipped_surrogate_advantage_tensor = torch.min(\n",
    "                surrogate_advantage_tensor, max_surrogate_advantage_tensor)\n",
    "        actor_loss_tensor = -clipped_surrogate_advantage_tensor.mean()\n",
    "        self.actor_optimizer.zero_grad()\n",
    "        actor_loss_tensor.backward()\n",
    "        self.actor_optimizer.step()\n",
    "\n",
    "        # train critic\n",
    "        pred_tensor = self.critic_net(state_tensor)\n",
    "        critic_loss_tensor = self.critic_loss(pred_tensor, return_tensor)\n",
    "        self.critic_optimizer.zero_grad()\n",
    "        critic_loss_tensor.backward()\n",
    "        self.critic_optimizer.step()\n",
    "\n",
    "\n",
    "agent = PPOAgent(env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22:56:35 [INFO] ==== train ====\n",
      "22:56:36 [INFO] NumExpr defaulting to 8 threads.\n",
      "22:56:36 [DEBUG] train episode 0: reward = -500.00, steps = 500\n",
      "22:56:37 [DEBUG] train episode 1: reward = -500.00, steps = 500\n",
      "22:56:38 [DEBUG] train episode 2: reward = -500.00, steps = 500\n",
      "22:56:40 [DEBUG] train episode 3: reward = -500.00, steps = 500\n",
      "22:56:41 [DEBUG] train episode 4: reward = -500.00, steps = 500\n",
      "22:56:42 [DEBUG] train episode 5: reward = -500.00, steps = 500\n",
      "22:56:43 [DEBUG] train episode 6: reward = -500.00, steps = 500\n",
      "22:56:45 [DEBUG] train episode 7: reward = -500.00, steps = 500\n",
      "22:56:46 [DEBUG] train episode 8: reward = -500.00, steps = 500\n",
      "22:56:47 [DEBUG] train episode 9: reward = -500.00, steps = 500\n",
      "22:56:48 [DEBUG] train episode 10: reward = -500.00, steps = 500\n",
      "22:56:50 [DEBUG] train episode 11: reward = -500.00, steps = 500\n",
      "22:56:51 [DEBUG] train episode 12: reward = -500.00, steps = 500\n",
      "22:56:52 [DEBUG] train episode 13: reward = -500.00, steps = 500\n",
      "22:56:53 [DEBUG] train episode 14: reward = -500.00, steps = 500\n",
      "22:56:55 [DEBUG] train episode 15: reward = -500.00, steps = 500\n",
      "22:56:56 [DEBUG] train episode 16: reward = -500.00, steps = 500\n",
      "22:56:57 [DEBUG] train episode 17: reward = -500.00, steps = 500\n",
      "22:56:58 [DEBUG] train episode 18: reward = -500.00, steps = 500\n",
      "22:57:00 [DEBUG] train episode 19: reward = -500.00, steps = 500\n",
      "22:57:01 [DEBUG] train episode 20: reward = -500.00, steps = 500\n",
      "22:57:02 [DEBUG] train episode 21: reward = -500.00, steps = 500\n",
      "22:57:03 [DEBUG] train episode 22: reward = -500.00, steps = 500\n",
      "22:57:05 [DEBUG] train episode 23: reward = -500.00, steps = 500\n",
      "22:57:06 [DEBUG] train episode 24: reward = -500.00, steps = 500\n",
      "22:57:07 [DEBUG] train episode 25: reward = -500.00, steps = 500\n",
      "22:57:08 [DEBUG] train episode 26: reward = -395.00, steps = 396\n",
      "22:57:09 [DEBUG] train episode 27: reward = -500.00, steps = 500\n",
      "22:57:10 [DEBUG] train episode 28: reward = -500.00, steps = 500\n",
      "22:57:11 [DEBUG] train episode 29: reward = -500.00, steps = 500\n",
      "22:57:13 [DEBUG] train episode 30: reward = -500.00, steps = 500\n",
      "22:57:14 [DEBUG] train episode 31: reward = -500.00, steps = 500\n",
      "22:57:15 [DEBUG] train episode 32: reward = -500.00, steps = 500\n",
      "22:57:16 [DEBUG] train episode 33: reward = -500.00, steps = 500\n",
      "22:57:17 [DEBUG] train episode 34: reward = -500.00, steps = 500\n",
      "22:57:18 [DEBUG] train episode 35: reward = -500.00, steps = 500\n",
      "22:57:20 [DEBUG] train episode 36: reward = -500.00, steps = 500\n",
      "22:57:21 [DEBUG] train episode 37: reward = -500.00, steps = 500\n",
      "22:57:22 [DEBUG] train episode 38: reward = -500.00, steps = 500\n",
      "22:57:23 [DEBUG] train episode 39: reward = -500.00, steps = 500\n",
      "22:57:24 [DEBUG] train episode 40: reward = -500.00, steps = 500\n",
      "22:57:25 [DEBUG] train episode 41: reward = -329.00, steps = 330\n",
      "22:57:26 [DEBUG] train episode 42: reward = -500.00, steps = 500\n",
      "22:57:28 [DEBUG] train episode 43: reward = -500.00, steps = 500\n",
      "22:57:29 [DEBUG] train episode 44: reward = -500.00, steps = 500\n",
      "22:57:30 [DEBUG] train episode 45: reward = -500.00, steps = 500\n",
      "22:57:31 [DEBUG] train episode 46: reward = -500.00, steps = 500\n",
      "22:57:32 [DEBUG] train episode 47: reward = -500.00, steps = 500\n",
      "22:57:34 [DEBUG] train episode 48: reward = -500.00, steps = 500\n",
      "22:57:35 [DEBUG] train episode 49: reward = -500.00, steps = 500\n",
      "22:57:36 [DEBUG] train episode 50: reward = -467.00, steps = 468\n",
      "22:57:37 [DEBUG] train episode 51: reward = -500.00, steps = 500\n",
      "22:57:38 [DEBUG] train episode 52: reward = -491.00, steps = 492\n",
      "22:57:39 [DEBUG] train episode 53: reward = -500.00, steps = 500\n",
      "22:57:40 [DEBUG] train episode 54: reward = -500.00, steps = 500\n",
      "22:57:41 [DEBUG] train episode 55: reward = -351.00, steps = 352\n",
      "22:57:42 [DEBUG] train episode 56: reward = -500.00, steps = 500\n",
      "22:57:43 [DEBUG] train episode 57: reward = -438.00, steps = 439\n",
      "22:57:43 [DEBUG] train episode 58: reward = -303.00, steps = 304\n",
      "22:57:44 [DEBUG] train episode 59: reward = -373.00, steps = 374\n",
      "22:57:45 [DEBUG] train episode 60: reward = -328.00, steps = 329\n",
      "22:57:45 [DEBUG] train episode 61: reward = -288.00, steps = 289\n",
      "22:57:46 [DEBUG] train episode 62: reward = -332.00, steps = 333\n",
      "22:57:47 [DEBUG] train episode 63: reward = -457.00, steps = 458\n",
      "22:57:48 [DEBUG] train episode 64: reward = -500.00, steps = 500\n",
      "22:57:49 [DEBUG] train episode 65: reward = -368.00, steps = 369\n",
      "22:57:50 [DEBUG] train episode 66: reward = -500.00, steps = 500\n",
      "22:57:50 [DEBUG] train episode 67: reward = -322.00, steps = 323\n",
      "22:57:51 [DEBUG] train episode 68: reward = -278.00, steps = 279\n",
      "22:57:51 [DEBUG] train episode 69: reward = -337.00, steps = 338\n",
      "22:57:52 [DEBUG] train episode 70: reward = -359.00, steps = 360\n",
      "22:57:53 [DEBUG] train episode 71: reward = -409.00, steps = 410\n",
      "22:57:53 [DEBUG] train episode 72: reward = -283.00, steps = 284\n",
      "22:57:54 [DEBUG] train episode 73: reward = -229.00, steps = 230\n",
      "22:57:55 [DEBUG] train episode 74: reward = -241.00, steps = 242\n",
      "22:57:56 [DEBUG] train episode 75: reward = -363.00, steps = 364\n",
      "22:57:56 [DEBUG] train episode 76: reward = -321.00, steps = 322\n",
      "22:57:57 [DEBUG] train episode 77: reward = -415.00, steps = 416\n",
      "22:57:58 [DEBUG] train episode 78: reward = -500.00, steps = 500\n",
      "22:57:59 [DEBUG] train episode 79: reward = -324.00, steps = 325\n",
      "22:58:00 [DEBUG] train episode 80: reward = -331.00, steps = 332\n",
      "22:58:00 [DEBUG] train episode 81: reward = -218.00, steps = 219\n",
      "22:58:01 [DEBUG] train episode 82: reward = -325.00, steps = 326\n",
      "22:58:02 [DEBUG] train episode 83: reward = -453.00, steps = 454\n",
      "22:58:02 [DEBUG] train episode 84: reward = -212.00, steps = 213\n",
      "22:58:02 [DEBUG] train episode 85: reward = -246.00, steps = 247\n",
      "22:58:03 [DEBUG] train episode 86: reward = -358.00, steps = 359\n",
      "22:58:03 [DEBUG] train episode 87: reward = -145.00, steps = 146\n",
      "22:58:04 [DEBUG] train episode 88: reward = -118.00, steps = 119\n",
      "22:58:04 [DEBUG] train episode 89: reward = -157.00, steps = 158\n",
      "22:58:05 [DEBUG] train episode 90: reward = -228.00, steps = 229\n",
      "22:58:05 [DEBUG] train episode 91: reward = -225.00, steps = 226\n",
      "22:58:05 [DEBUG] train episode 92: reward = -275.00, steps = 276\n",
      "22:58:06 [DEBUG] train episode 93: reward = -254.00, steps = 255\n",
      "22:58:06 [DEBUG] train episode 94: reward = -235.00, steps = 236\n",
      "22:58:07 [DEBUG] train episode 95: reward = -165.00, steps = 166\n",
      "22:58:07 [DEBUG] train episode 96: reward = -199.00, steps = 200\n",
      "22:58:07 [DEBUG] train episode 97: reward = -267.00, steps = 268\n",
      "22:58:08 [DEBUG] train episode 98: reward = -226.00, steps = 227\n",
      "22:58:08 [DEBUG] train episode 99: reward = -168.00, steps = 169\n",
      "22:58:09 [DEBUG] train episode 100: reward = -329.00, steps = 330\n",
      "22:58:09 [DEBUG] train episode 101: reward = -181.00, steps = 182\n",
      "22:58:09 [DEBUG] train episode 102: reward = -194.00, steps = 195\n",
      "22:58:10 [DEBUG] train episode 103: reward = -172.00, steps = 173\n",
      "22:58:10 [DEBUG] train episode 104: reward = -223.00, steps = 224\n",
      "22:58:11 [DEBUG] train episode 105: reward = -252.00, steps = 253\n",
      "22:58:11 [DEBUG] train episode 106: reward = -205.00, steps = 206\n",
      "22:58:11 [DEBUG] train episode 107: reward = -182.00, steps = 183\n",
      "22:58:12 [DEBUG] train episode 108: reward = -180.00, steps = 181\n",
      "22:58:12 [DEBUG] train episode 109: reward = -219.00, steps = 220\n",
      "22:58:13 [DEBUG] train episode 110: reward = -195.00, steps = 196\n",
      "22:58:13 [DEBUG] train episode 111: reward = -213.00, steps = 214\n",
      "22:58:13 [DEBUG] train episode 112: reward = -190.00, steps = 191\n",
      "22:58:14 [DEBUG] train episode 113: reward = -212.00, steps = 213\n",
      "22:58:14 [DEBUG] train episode 114: reward = -212.00, steps = 213\n",
      "22:58:14 [DEBUG] train episode 115: reward = -221.00, steps = 222\n",
      "22:58:14 [DEBUG] train episode 116: reward = -175.00, steps = 176\n",
      "22:58:15 [DEBUG] train episode 117: reward = -241.00, steps = 242\n",
      "22:58:15 [DEBUG] train episode 118: reward = -163.00, steps = 164\n",
      "22:58:15 [DEBUG] train episode 119: reward = -219.00, steps = 220\n",
      "22:58:16 [DEBUG] train episode 120: reward = -222.00, steps = 223\n",
      "22:58:16 [DEBUG] train episode 121: reward = -253.00, steps = 254\n",
      "22:58:17 [DEBUG] train episode 122: reward = -223.00, steps = 224\n",
      "22:58:17 [DEBUG] train episode 123: reward = -210.00, steps = 211\n",
      "22:58:17 [DEBUG] train episode 124: reward = -224.00, steps = 225\n",
      "22:58:18 [DEBUG] train episode 125: reward = -165.00, steps = 166\n",
      "22:58:18 [DEBUG] train episode 126: reward = -268.00, steps = 269\n",
      "22:58:18 [DEBUG] train episode 127: reward = -272.00, steps = 273\n",
      "22:58:19 [DEBUG] train episode 128: reward = -207.00, steps = 208\n",
      "22:58:19 [DEBUG] train episode 129: reward = -218.00, steps = 219\n",
      "22:58:19 [DEBUG] train episode 130: reward = -188.00, steps = 189\n",
      "22:58:20 [DEBUG] train episode 131: reward = -187.00, steps = 188\n",
      "22:58:20 [DEBUG] train episode 132: reward = -301.00, steps = 302\n",
      "22:58:21 [DEBUG] train episode 133: reward = -236.00, steps = 237\n",
      "22:58:21 [DEBUG] train episode 134: reward = -368.00, steps = 369\n",
      "22:58:21 [DEBUG] train episode 135: reward = -199.00, steps = 200\n",
      "22:58:22 [DEBUG] train episode 136: reward = -394.00, steps = 395\n",
      "22:58:22 [DEBUG] train episode 137: reward = -211.00, steps = 212\n",
      "22:58:23 [DEBUG] train episode 138: reward = -182.00, steps = 183\n",
      "22:58:23 [DEBUG] train episode 139: reward = -257.00, steps = 258\n",
      "22:58:23 [DEBUG] train episode 140: reward = -163.00, steps = 164\n",
      "22:58:23 [DEBUG] train episode 141: reward = -206.00, steps = 207\n",
      "22:58:24 [DEBUG] train episode 142: reward = -294.00, steps = 295\n",
      "22:58:24 [DEBUG] train episode 143: reward = -276.00, steps = 277\n",
      "22:58:24 [DEBUG] train episode 144: reward = -163.00, steps = 164\n",
      "22:58:25 [DEBUG] train episode 145: reward = -194.00, steps = 195\n",
      "22:58:25 [DEBUG] train episode 146: reward = -231.00, steps = 232\n",
      "22:58:25 [DEBUG] train episode 147: reward = -281.00, steps = 282\n",
      "22:58:26 [DEBUG] train episode 148: reward = -135.00, steps = 136\n",
      "22:58:26 [DEBUG] train episode 149: reward = -175.00, steps = 176\n",
      "22:58:26 [DEBUG] train episode 150: reward = -264.00, steps = 265\n",
      "22:58:27 [DEBUG] train episode 151: reward = -203.00, steps = 204\n",
      "22:58:27 [DEBUG] train episode 152: reward = -186.00, steps = 187\n",
      "22:58:27 [DEBUG] train episode 153: reward = -349.00, steps = 350\n",
      "22:58:28 [DEBUG] train episode 154: reward = -307.00, steps = 308\n",
      "22:58:28 [DEBUG] train episode 155: reward = -261.00, steps = 262\n",
      "22:58:29 [DEBUG] train episode 156: reward = -278.00, steps = 279\n",
      "22:58:29 [DEBUG] train episode 157: reward = -293.00, steps = 294\n",
      "22:58:30 [DEBUG] train episode 158: reward = -251.00, steps = 252\n",
      "22:58:30 [DEBUG] train episode 159: reward = -236.00, steps = 237\n",
      "22:58:30 [DEBUG] train episode 160: reward = -281.00, steps = 282\n",
      "22:58:31 [DEBUG] train episode 161: reward = -248.00, steps = 249\n",
      "22:58:31 [DEBUG] train episode 162: reward = -206.00, steps = 207\n",
      "22:58:31 [DEBUG] train episode 163: reward = -272.00, steps = 273\n",
      "22:58:32 [DEBUG] train episode 164: reward = -274.00, steps = 275\n",
      "22:58:32 [DEBUG] train episode 165: reward = -180.00, steps = 181\n",
      "22:58:33 [DEBUG] train episode 166: reward = -500.00, steps = 500\n",
      "22:58:33 [DEBUG] train episode 167: reward = -282.00, steps = 283\n",
      "22:58:34 [DEBUG] train episode 168: reward = -243.00, steps = 244\n",
      "22:58:34 [DEBUG] train episode 169: reward = -175.00, steps = 176\n",
      "22:58:34 [DEBUG] train episode 170: reward = -279.00, steps = 280\n",
      "22:58:35 [DEBUG] train episode 171: reward = -198.00, steps = 199\n",
      "22:58:35 [DEBUG] train episode 172: reward = -195.00, steps = 196\n",
      "22:58:35 [DEBUG] train episode 173: reward = -218.00, steps = 219\n",
      "22:58:36 [DEBUG] train episode 174: reward = -328.00, steps = 329\n",
      "22:58:36 [DEBUG] train episode 175: reward = -179.00, steps = 180\n",
      "22:58:36 [DEBUG] train episode 176: reward = -197.00, steps = 198\n",
      "22:58:37 [DEBUG] train episode 177: reward = -299.00, steps = 300\n",
      "22:58:37 [DEBUG] train episode 178: reward = -279.00, steps = 280\n",
      "22:58:38 [DEBUG] train episode 179: reward = -355.00, steps = 356\n",
      "22:58:39 [DEBUG] train episode 180: reward = -325.00, steps = 326\n",
      "22:58:39 [DEBUG] train episode 181: reward = -186.00, steps = 187\n",
      "22:58:39 [DEBUG] train episode 182: reward = -250.00, steps = 251\n",
      "22:58:40 [DEBUG] train episode 183: reward = -286.00, steps = 287\n",
      "22:58:40 [DEBUG] train episode 184: reward = -177.00, steps = 178\n",
      "22:58:41 [DEBUG] train episode 185: reward = -217.00, steps = 218\n",
      "22:58:41 [DEBUG] train episode 186: reward = -223.00, steps = 224\n",
      "22:58:41 [DEBUG] train episode 187: reward = -209.00, steps = 210\n",
      "22:58:42 [DEBUG] train episode 188: reward = -210.00, steps = 211\n",
      "22:58:42 [DEBUG] train episode 189: reward = -237.00, steps = 238\n",
      "22:58:42 [DEBUG] train episode 190: reward = -182.00, steps = 183\n",
      "22:58:43 [DEBUG] train episode 191: reward = -165.00, steps = 166\n",
      "22:58:43 [DEBUG] train episode 192: reward = -204.00, steps = 205\n",
      "22:58:43 [DEBUG] train episode 193: reward = -211.00, steps = 212\n",
      "22:58:44 [DEBUG] train episode 194: reward = -174.00, steps = 175\n",
      "22:58:44 [DEBUG] train episode 195: reward = -183.00, steps = 184\n",
      "22:58:44 [DEBUG] train episode 196: reward = -272.00, steps = 273\n",
      "22:58:45 [DEBUG] train episode 197: reward = -241.00, steps = 242\n",
      "22:58:45 [DEBUG] train episode 198: reward = -257.00, steps = 258\n",
      "22:58:45 [DEBUG] train episode 199: reward = -220.00, steps = 221\n",
      "22:58:46 [DEBUG] train episode 200: reward = -226.00, steps = 227\n",
      "22:58:46 [DEBUG] train episode 201: reward = -235.00, steps = 236\n",
      "22:58:47 [DEBUG] train episode 202: reward = -257.00, steps = 258\n",
      "22:58:47 [DEBUG] train episode 203: reward = -340.00, steps = 341\n",
      "22:58:47 [DEBUG] train episode 204: reward = -202.00, steps = 203\n",
      "22:58:48 [DEBUG] train episode 205: reward = -451.00, steps = 452\n",
      "22:58:48 [DEBUG] train episode 206: reward = -231.00, steps = 232\n",
      "22:58:49 [DEBUG] train episode 207: reward = -192.00, steps = 193\n",
      "22:58:49 [DEBUG] train episode 208: reward = -245.00, steps = 246\n",
      "22:58:49 [DEBUG] train episode 209: reward = -192.00, steps = 193\n",
      "22:58:50 [DEBUG] train episode 210: reward = -231.00, steps = 232\n",
      "22:58:50 [DEBUG] train episode 211: reward = -303.00, steps = 304\n",
      "22:58:50 [DEBUG] train episode 212: reward = -223.00, steps = 224\n",
      "22:58:51 [DEBUG] train episode 213: reward = -190.00, steps = 191\n",
      "22:58:51 [DEBUG] train episode 214: reward = -188.00, steps = 189\n",
      "22:58:51 [DEBUG] train episode 215: reward = -152.00, steps = 153\n",
      "22:58:51 [DEBUG] train episode 216: reward = -157.00, steps = 158\n",
      "22:58:52 [DEBUG] train episode 217: reward = -350.00, steps = 351\n",
      "22:58:52 [DEBUG] train episode 218: reward = -124.00, steps = 125\n",
      "22:58:53 [DEBUG] train episode 219: reward = -349.00, steps = 350\n",
      "22:58:53 [DEBUG] train episode 220: reward = -171.00, steps = 172\n",
      "22:58:53 [DEBUG] train episode 221: reward = -190.00, steps = 191\n",
      "22:58:54 [DEBUG] train episode 222: reward = -325.00, steps = 326\n",
      "22:58:54 [DEBUG] train episode 223: reward = -348.00, steps = 349\n",
      "22:58:54 [DEBUG] train episode 224: reward = -235.00, steps = 236\n",
      "22:58:55 [DEBUG] train episode 225: reward = -180.00, steps = 181\n",
      "22:58:55 [DEBUG] train episode 226: reward = -203.00, steps = 204\n",
      "22:58:55 [DEBUG] train episode 227: reward = -266.00, steps = 267\n",
      "22:58:56 [DEBUG] train episode 228: reward = -217.00, steps = 218\n",
      "22:58:56 [DEBUG] train episode 229: reward = -175.00, steps = 176\n",
      "22:58:56 [DEBUG] train episode 230: reward = -160.00, steps = 161\n",
      "22:58:56 [DEBUG] train episode 231: reward = -134.00, steps = 135\n",
      "22:58:57 [DEBUG] train episode 232: reward = -152.00, steps = 153\n",
      "22:58:57 [DEBUG] train episode 233: reward = -210.00, steps = 211\n",
      "22:58:57 [DEBUG] train episode 234: reward = -216.00, steps = 217\n",
      "22:58:57 [DEBUG] train episode 235: reward = -199.00, steps = 200\n",
      "22:58:58 [DEBUG] train episode 236: reward = -122.00, steps = 123\n",
      "22:58:58 [DEBUG] train episode 237: reward = -132.00, steps = 133\n",
      "22:58:58 [DEBUG] train episode 238: reward = -310.00, steps = 311\n",
      "22:58:58 [DEBUG] train episode 239: reward = -262.00, steps = 263\n",
      "22:58:59 [DEBUG] train episode 240: reward = -196.00, steps = 197\n",
      "22:58:59 [DEBUG] train episode 241: reward = -215.00, steps = 216\n",
      "22:58:59 [DEBUG] train episode 242: reward = -160.00, steps = 161\n",
      "22:59:00 [DEBUG] train episode 243: reward = -174.00, steps = 175\n",
      "22:59:00 [DEBUG] train episode 244: reward = -282.00, steps = 283\n",
      "22:59:00 [DEBUG] train episode 245: reward = -287.00, steps = 288\n",
      "22:59:01 [DEBUG] train episode 246: reward = -222.00, steps = 223\n",
      "22:59:01 [DEBUG] train episode 247: reward = -200.00, steps = 201\n",
      "22:59:01 [DEBUG] train episode 248: reward = -240.00, steps = 241\n",
      "22:59:01 [DEBUG] train episode 249: reward = -259.00, steps = 260\n",
      "22:59:02 [DEBUG] train episode 250: reward = -251.00, steps = 252\n",
      "22:59:02 [DEBUG] train episode 251: reward = -178.00, steps = 179\n",
      "22:59:02 [DEBUG] train episode 252: reward = -176.00, steps = 177\n",
      "22:59:02 [DEBUG] train episode 253: reward = -224.00, steps = 225\n",
      "22:59:03 [DEBUG] train episode 254: reward = -136.00, steps = 137\n",
      "22:59:03 [DEBUG] train episode 255: reward = -159.00, steps = 160\n",
      "22:59:03 [DEBUG] train episode 256: reward = -166.00, steps = 167\n",
      "22:59:03 [DEBUG] train episode 257: reward = -132.00, steps = 133\n",
      "22:59:04 [DEBUG] train episode 258: reward = -251.00, steps = 252\n",
      "22:59:04 [DEBUG] train episode 259: reward = -254.00, steps = 255\n",
      "22:59:04 [DEBUG] train episode 260: reward = -151.00, steps = 152\n",
      "22:59:04 [DEBUG] train episode 261: reward = -145.00, steps = 146\n",
      "22:59:05 [DEBUG] train episode 262: reward = -214.00, steps = 215\n",
      "22:59:05 [DEBUG] train episode 263: reward = -206.00, steps = 207\n",
      "22:59:05 [DEBUG] train episode 264: reward = -174.00, steps = 175\n",
      "22:59:05 [DEBUG] train episode 265: reward = -166.00, steps = 167\n",
      "22:59:06 [DEBUG] train episode 266: reward = -273.00, steps = 274\n",
      "22:59:06 [DEBUG] train episode 267: reward = -244.00, steps = 245\n",
      "22:59:06 [DEBUG] train episode 268: reward = -285.00, steps = 286\n",
      "22:59:06 [DEBUG] train episode 269: reward = -214.00, steps = 215\n",
      "22:59:07 [DEBUG] train episode 270: reward = -168.00, steps = 169\n",
      "22:59:07 [DEBUG] train episode 271: reward = -196.00, steps = 197\n",
      "22:59:07 [DEBUG] train episode 272: reward = -213.00, steps = 214\n",
      "22:59:07 [DEBUG] train episode 273: reward = -191.00, steps = 192\n",
      "22:59:08 [DEBUG] train episode 274: reward = -193.00, steps = 194\n",
      "22:59:08 [DEBUG] train episode 275: reward = -230.00, steps = 231\n",
      "22:59:08 [DEBUG] train episode 276: reward = -260.00, steps = 261\n",
      "22:59:09 [DEBUG] train episode 277: reward = -276.00, steps = 277\n",
      "22:59:09 [DEBUG] train episode 278: reward = -189.00, steps = 190\n",
      "22:59:09 [DEBUG] train episode 279: reward = -246.00, steps = 247\n",
      "22:59:10 [DEBUG] train episode 280: reward = -226.00, steps = 227\n",
      "22:59:10 [DEBUG] train episode 281: reward = -215.00, steps = 216\n",
      "22:59:10 [DEBUG] train episode 282: reward = -241.00, steps = 242\n",
      "22:59:10 [DEBUG] train episode 283: reward = -189.00, steps = 190\n",
      "22:59:11 [DEBUG] train episode 284: reward = -205.00, steps = 206\n",
      "22:59:11 [DEBUG] train episode 285: reward = -248.00, steps = 249\n",
      "22:59:11 [DEBUG] train episode 286: reward = -321.00, steps = 322\n",
      "22:59:12 [DEBUG] train episode 287: reward = -222.00, steps = 223\n",
      "22:59:12 [DEBUG] train episode 288: reward = -208.00, steps = 209\n",
      "22:59:12 [DEBUG] train episode 289: reward = -254.00, steps = 255\n",
      "22:59:12 [DEBUG] train episode 290: reward = -188.00, steps = 189\n",
      "22:59:12 [DEBUG] train episode 291: reward = -189.00, steps = 190\n",
      "22:59:13 [DEBUG] train episode 292: reward = -220.00, steps = 221\n",
      "22:59:13 [DEBUG] train episode 293: reward = -241.00, steps = 242\n",
      "22:59:13 [DEBUG] train episode 294: reward = -231.00, steps = 232\n",
      "22:59:14 [DEBUG] train episode 295: reward = -220.00, steps = 221\n",
      "22:59:14 [DEBUG] train episode 296: reward = -189.00, steps = 190\n",
      "22:59:14 [DEBUG] train episode 297: reward = -147.00, steps = 148\n",
      "22:59:14 [DEBUG] train episode 298: reward = -244.00, steps = 245\n",
      "22:59:15 [DEBUG] train episode 299: reward = -144.00, steps = 145\n",
      "22:59:15 [DEBUG] train episode 300: reward = -210.00, steps = 211\n",
      "22:59:15 [DEBUG] train episode 301: reward = -173.00, steps = 174\n",
      "22:59:15 [DEBUG] train episode 302: reward = -208.00, steps = 209\n",
      "22:59:16 [DEBUG] train episode 303: reward = -141.00, steps = 142\n",
      "22:59:16 [DEBUG] train episode 304: reward = -250.00, steps = 251\n",
      "22:59:16 [DEBUG] train episode 305: reward = -204.00, steps = 205\n",
      "22:59:16 [DEBUG] train episode 306: reward = -175.00, steps = 176\n",
      "22:59:17 [DEBUG] train episode 307: reward = -199.00, steps = 200\n",
      "22:59:17 [DEBUG] train episode 308: reward = -191.00, steps = 192\n",
      "22:59:17 [DEBUG] train episode 309: reward = -249.00, steps = 250\n",
      "22:59:17 [DEBUG] train episode 310: reward = -168.00, steps = 169\n",
      "22:59:18 [DEBUG] train episode 311: reward = -180.00, steps = 181\n",
      "22:59:18 [DEBUG] train episode 312: reward = -173.00, steps = 174\n",
      "22:59:18 [DEBUG] train episode 313: reward = -260.00, steps = 261\n",
      "22:59:19 [DEBUG] train episode 314: reward = -201.00, steps = 202\n",
      "22:59:19 [DEBUG] train episode 315: reward = -161.00, steps = 162\n",
      "22:59:19 [DEBUG] train episode 316: reward = -161.00, steps = 162\n",
      "22:59:19 [DEBUG] train episode 317: reward = -200.00, steps = 201\n",
      "22:59:20 [DEBUG] train episode 318: reward = -171.00, steps = 172\n",
      "22:59:20 [DEBUG] train episode 319: reward = -191.00, steps = 192\n",
      "22:59:20 [DEBUG] train episode 320: reward = -264.00, steps = 265\n",
      "22:59:20 [DEBUG] train episode 321: reward = -175.00, steps = 176\n",
      "22:59:21 [DEBUG] train episode 322: reward = -152.00, steps = 153\n",
      "22:59:21 [DEBUG] train episode 323: reward = -168.00, steps = 169\n",
      "22:59:21 [DEBUG] train episode 324: reward = -235.00, steps = 236\n",
      "22:59:21 [DEBUG] train episode 325: reward = -222.00, steps = 223\n",
      "22:59:22 [DEBUG] train episode 326: reward = -191.00, steps = 192\n",
      "22:59:22 [DEBUG] train episode 327: reward = -185.00, steps = 186\n",
      "22:59:22 [DEBUG] train episode 328: reward = -205.00, steps = 206\n",
      "22:59:22 [DEBUG] train episode 329: reward = -175.00, steps = 176\n",
      "22:59:23 [DEBUG] train episode 330: reward = -190.00, steps = 191\n",
      "22:59:23 [DEBUG] train episode 331: reward = -199.00, steps = 200\n",
      "22:59:23 [DEBUG] train episode 332: reward = -155.00, steps = 156\n",
      "22:59:23 [DEBUG] train episode 333: reward = -218.00, steps = 219\n",
      "22:59:24 [DEBUG] train episode 334: reward = -170.00, steps = 171\n",
      "22:59:24 [DEBUG] train episode 335: reward = -223.00, steps = 224\n",
      "22:59:24 [DEBUG] train episode 336: reward = -184.00, steps = 185\n",
      "22:59:25 [DEBUG] train episode 337: reward = -363.00, steps = 364\n",
      "22:59:25 [DEBUG] train episode 338: reward = -218.00, steps = 219\n",
      "22:59:25 [DEBUG] train episode 339: reward = -162.00, steps = 163\n",
      "22:59:25 [DEBUG] train episode 340: reward = -167.00, steps = 168\n",
      "22:59:26 [DEBUG] train episode 341: reward = -200.00, steps = 201\n",
      "22:59:26 [DEBUG] train episode 342: reward = -295.00, steps = 296\n",
      "22:59:26 [DEBUG] train episode 343: reward = -128.00, steps = 129\n",
      "22:59:26 [DEBUG] train episode 344: reward = -193.00, steps = 194\n",
      "22:59:27 [DEBUG] train episode 345: reward = -221.00, steps = 222\n",
      "22:59:27 [DEBUG] train episode 346: reward = -162.00, steps = 163\n",
      "22:59:27 [DEBUG] train episode 347: reward = -207.00, steps = 208\n",
      "22:59:27 [DEBUG] train episode 348: reward = -164.00, steps = 165\n",
      "22:59:28 [DEBUG] train episode 349: reward = -163.00, steps = 164\n",
      "22:59:28 [DEBUG] train episode 350: reward = -197.00, steps = 198\n",
      "22:59:28 [DEBUG] train episode 351: reward = -162.00, steps = 163\n",
      "22:59:28 [DEBUG] train episode 352: reward = -208.00, steps = 209\n",
      "22:59:29 [DEBUG] train episode 353: reward = -183.00, steps = 184\n",
      "22:59:29 [DEBUG] train episode 354: reward = -179.00, steps = 180\n",
      "22:59:29 [DEBUG] train episode 355: reward = -285.00, steps = 286\n",
      "22:59:29 [DEBUG] train episode 356: reward = -158.00, steps = 159\n",
      "22:59:30 [DEBUG] train episode 357: reward = -194.00, steps = 195\n",
      "22:59:30 [DEBUG] train episode 358: reward = -171.00, steps = 172\n",
      "22:59:30 [DEBUG] train episode 359: reward = -177.00, steps = 178\n",
      "22:59:30 [DEBUG] train episode 360: reward = -194.00, steps = 195\n",
      "22:59:31 [DEBUG] train episode 361: reward = -226.00, steps = 227\n",
      "22:59:31 [DEBUG] train episode 362: reward = -185.00, steps = 186\n",
      "22:59:31 [DEBUG] train episode 363: reward = -191.00, steps = 192\n",
      "22:59:31 [DEBUG] train episode 364: reward = -165.00, steps = 166\n",
      "22:59:32 [DEBUG] train episode 365: reward = -162.00, steps = 163\n",
      "22:59:32 [DEBUG] train episode 366: reward = -190.00, steps = 191\n",
      "22:59:32 [DEBUG] train episode 367: reward = -161.00, steps = 162\n",
      "22:59:32 [DEBUG] train episode 368: reward = -209.00, steps = 210\n",
      "22:59:33 [DEBUG] train episode 369: reward = -422.00, steps = 423\n",
      "22:59:33 [DEBUG] train episode 370: reward = -149.00, steps = 150\n",
      "22:59:33 [DEBUG] train episode 371: reward = -195.00, steps = 196\n",
      "22:59:33 [DEBUG] train episode 372: reward = -192.00, steps = 193\n",
      "22:59:34 [DEBUG] train episode 373: reward = -153.00, steps = 154\n",
      "22:59:34 [DEBUG] train episode 374: reward = -181.00, steps = 182\n",
      "22:59:34 [DEBUG] train episode 375: reward = -155.00, steps = 156\n",
      "22:59:34 [DEBUG] train episode 376: reward = -229.00, steps = 230\n",
      "22:59:35 [DEBUG] train episode 377: reward = -178.00, steps = 179\n",
      "22:59:35 [DEBUG] train episode 378: reward = -188.00, steps = 189\n",
      "22:59:35 [DEBUG] train episode 379: reward = -150.00, steps = 151\n",
      "22:59:35 [DEBUG] train episode 380: reward = -282.00, steps = 283\n",
      "22:59:36 [DEBUG] train episode 381: reward = -160.00, steps = 161\n",
      "22:59:36 [DEBUG] train episode 382: reward = -229.00, steps = 230\n",
      "22:59:36 [DEBUG] train episode 383: reward = -177.00, steps = 178\n",
      "22:59:36 [DEBUG] train episode 384: reward = -246.00, steps = 247\n",
      "22:59:37 [DEBUG] train episode 385: reward = -215.00, steps = 216\n",
      "22:59:37 [DEBUG] train episode 386: reward = -170.00, steps = 171\n",
      "22:59:37 [DEBUG] train episode 387: reward = -195.00, steps = 196\n",
      "22:59:37 [DEBUG] train episode 388: reward = -192.00, steps = 193\n",
      "22:59:38 [DEBUG] train episode 389: reward = -168.00, steps = 169\n",
      "22:59:38 [DEBUG] train episode 390: reward = -166.00, steps = 167\n",
      "22:59:38 [DEBUG] train episode 391: reward = -187.00, steps = 188\n",
      "22:59:38 [DEBUG] train episode 392: reward = -178.00, steps = 179\n",
      "22:59:39 [DEBUG] train episode 393: reward = -183.00, steps = 184\n",
      "22:59:39 [DEBUG] train episode 394: reward = -186.00, steps = 187\n",
      "22:59:39 [DEBUG] train episode 395: reward = -221.00, steps = 222\n",
      "22:59:39 [DEBUG] train episode 396: reward = -159.00, steps = 160\n",
      "22:59:40 [DEBUG] train episode 397: reward = -133.00, steps = 134\n",
      "22:59:40 [DEBUG] train episode 398: reward = -257.00, steps = 258\n",
      "22:59:40 [DEBUG] train episode 399: reward = -266.00, steps = 267\n",
      "22:59:40 [DEBUG] train episode 400: reward = -195.00, steps = 196\n",
      "22:59:41 [DEBUG] train episode 401: reward = -173.00, steps = 174\n",
      "22:59:41 [DEBUG] train episode 402: reward = -144.00, steps = 145\n",
      "22:59:41 [DEBUG] train episode 403: reward = -174.00, steps = 175\n",
      "22:59:41 [DEBUG] train episode 404: reward = -192.00, steps = 193\n",
      "22:59:42 [DEBUG] train episode 405: reward = -178.00, steps = 179\n",
      "22:59:42 [DEBUG] train episode 406: reward = -228.00, steps = 229\n",
      "22:59:42 [DEBUG] train episode 407: reward = -207.00, steps = 208\n",
      "22:59:42 [DEBUG] train episode 408: reward = -156.00, steps = 157\n",
      "22:59:43 [DEBUG] train episode 409: reward = -216.00, steps = 217\n",
      "22:59:43 [DEBUG] train episode 410: reward = -135.00, steps = 136\n",
      "22:59:43 [DEBUG] train episode 411: reward = -194.00, steps = 195\n",
      "22:59:43 [DEBUG] train episode 412: reward = -186.00, steps = 187\n",
      "22:59:44 [DEBUG] train episode 413: reward = -253.00, steps = 254\n",
      "22:59:44 [DEBUG] train episode 414: reward = -268.00, steps = 269\n",
      "22:59:44 [DEBUG] train episode 415: reward = -173.00, steps = 174\n",
      "22:59:45 [DEBUG] train episode 416: reward = -351.00, steps = 352\n",
      "22:59:45 [DEBUG] train episode 417: reward = -204.00, steps = 205\n",
      "22:59:45 [DEBUG] train episode 418: reward = -180.00, steps = 181\n",
      "22:59:46 [DEBUG] train episode 419: reward = -198.00, steps = 199\n",
      "22:59:46 [DEBUG] train episode 420: reward = -206.00, steps = 207\n",
      "22:59:46 [DEBUG] train episode 421: reward = -146.00, steps = 147\n",
      "22:59:46 [DEBUG] train episode 422: reward = -191.00, steps = 192\n",
      "22:59:47 [DEBUG] train episode 423: reward = -222.00, steps = 223\n",
      "22:59:47 [DEBUG] train episode 424: reward = -197.00, steps = 198\n",
      "22:59:47 [DEBUG] train episode 425: reward = -138.00, steps = 139\n",
      "22:59:47 [DEBUG] train episode 426: reward = -209.00, steps = 210\n",
      "22:59:48 [DEBUG] train episode 427: reward = -189.00, steps = 190\n",
      "22:59:48 [DEBUG] train episode 428: reward = -181.00, steps = 182\n",
      "22:59:48 [DEBUG] train episode 429: reward = -165.00, steps = 166\n",
      "22:59:48 [DEBUG] train episode 430: reward = -186.00, steps = 187\n",
      "22:59:49 [DEBUG] train episode 431: reward = -171.00, steps = 172\n",
      "22:59:49 [DEBUG] train episode 432: reward = -175.00, steps = 176\n",
      "22:59:49 [DEBUG] train episode 433: reward = -216.00, steps = 217\n",
      "22:59:49 [DEBUG] train episode 434: reward = -143.00, steps = 144\n",
      "22:59:50 [DEBUG] train episode 435: reward = -151.00, steps = 152\n",
      "22:59:50 [DEBUG] train episode 436: reward = -132.00, steps = 133\n",
      "22:59:50 [DEBUG] train episode 437: reward = -151.00, steps = 152\n",
      "22:59:50 [DEBUG] train episode 438: reward = -232.00, steps = 233\n",
      "22:59:50 [DEBUG] train episode 439: reward = -166.00, steps = 167\n",
      "22:59:51 [DEBUG] train episode 440: reward = -179.00, steps = 180\n",
      "22:59:51 [DEBUG] train episode 441: reward = -166.00, steps = 167\n",
      "22:59:51 [DEBUG] train episode 442: reward = -193.00, steps = 194\n",
      "22:59:51 [DEBUG] train episode 443: reward = -152.00, steps = 153\n",
      "22:59:52 [DEBUG] train episode 444: reward = -291.00, steps = 292\n",
      "22:59:52 [DEBUG] train episode 445: reward = -144.00, steps = 145\n",
      "22:59:52 [DEBUG] train episode 446: reward = -261.00, steps = 262\n",
      "22:59:53 [DEBUG] train episode 447: reward = -178.00, steps = 179\n",
      "22:59:53 [DEBUG] train episode 448: reward = -242.00, steps = 243\n",
      "22:59:53 [DEBUG] train episode 449: reward = -155.00, steps = 156\n",
      "22:59:53 [DEBUG] train episode 450: reward = -188.00, steps = 189\n",
      "22:59:54 [DEBUG] train episode 451: reward = -159.00, steps = 160\n",
      "22:59:54 [DEBUG] train episode 452: reward = -311.00, steps = 312\n",
      "22:59:54 [DEBUG] train episode 453: reward = -188.00, steps = 189\n",
      "22:59:54 [DEBUG] train episode 454: reward = -270.00, steps = 271\n",
      "22:59:55 [DEBUG] train episode 455: reward = -224.00, steps = 225\n",
      "22:59:55 [DEBUG] train episode 456: reward = -122.00, steps = 123\n",
      "22:59:55 [DEBUG] train episode 457: reward = -210.00, steps = 211\n",
      "22:59:55 [DEBUG] train episode 458: reward = -156.00, steps = 157\n",
      "22:59:55 [DEBUG] train episode 459: reward = -147.00, steps = 148\n",
      "22:59:55 [DEBUG] train episode 460: reward = -143.00, steps = 144\n",
      "22:59:56 [DEBUG] train episode 461: reward = -256.00, steps = 257\n",
      "22:59:56 [DEBUG] train episode 462: reward = -158.00, steps = 159\n",
      "22:59:56 [DEBUG] train episode 463: reward = -196.00, steps = 197\n",
      "22:59:56 [DEBUG] train episode 464: reward = -175.00, steps = 176\n",
      "22:59:57 [DEBUG] train episode 465: reward = -196.00, steps = 197\n",
      "22:59:57 [DEBUG] train episode 466: reward = -159.00, steps = 160\n",
      "22:59:57 [DEBUG] train episode 467: reward = -143.00, steps = 144\n",
      "22:59:57 [DEBUG] train episode 468: reward = -192.00, steps = 193\n",
      "22:59:57 [DEBUG] train episode 469: reward = -204.00, steps = 205\n",
      "22:59:58 [DEBUG] train episode 470: reward = -229.00, steps = 230\n",
      "22:59:58 [DEBUG] train episode 471: reward = -153.00, steps = 154\n",
      "22:59:58 [DEBUG] train episode 472: reward = -304.00, steps = 305\n",
      "22:59:58 [DEBUG] train episode 473: reward = -149.00, steps = 150\n",
      "22:59:59 [DEBUG] train episode 474: reward = -162.00, steps = 163\n",
      "22:59:59 [DEBUG] train episode 475: reward = -170.00, steps = 171\n",
      "22:59:59 [DEBUG] train episode 476: reward = -117.00, steps = 118\n",
      "22:59:59 [DEBUG] train episode 477: reward = -153.00, steps = 154\n",
      "22:59:59 [DEBUG] train episode 478: reward = -190.00, steps = 191\n",
      "23:00:00 [DEBUG] train episode 479: reward = -223.00, steps = 224\n",
      "23:00:00 [DEBUG] train episode 480: reward = -183.00, steps = 184\n",
      "23:00:00 [DEBUG] train episode 481: reward = -149.00, steps = 150\n",
      "23:00:00 [DEBUG] train episode 482: reward = -208.00, steps = 209\n",
      "23:00:00 [DEBUG] train episode 483: reward = -176.00, steps = 177\n",
      "23:00:01 [DEBUG] train episode 484: reward = -179.00, steps = 180\n",
      "23:00:01 [DEBUG] train episode 485: reward = -153.00, steps = 154\n",
      "23:00:01 [DEBUG] train episode 486: reward = -200.00, steps = 201\n",
      "23:00:01 [DEBUG] train episode 487: reward = -179.00, steps = 180\n",
      "23:00:01 [DEBUG] train episode 488: reward = -191.00, steps = 192\n",
      "23:00:02 [DEBUG] train episode 489: reward = -167.00, steps = 168\n",
      "23:00:02 [DEBUG] train episode 490: reward = -184.00, steps = 185\n",
      "23:00:02 [DEBUG] train episode 491: reward = -173.00, steps = 174\n",
      "23:00:02 [DEBUG] train episode 492: reward = -195.00, steps = 196\n",
      "23:00:03 [DEBUG] train episode 493: reward = -268.00, steps = 269\n",
      "23:00:03 [DEBUG] train episode 494: reward = -160.00, steps = 161\n",
      "23:00:03 [DEBUG] train episode 495: reward = -181.00, steps = 182\n",
      "23:00:03 [DEBUG] train episode 496: reward = -155.00, steps = 156\n",
      "23:00:03 [DEBUG] train episode 497: reward = -143.00, steps = 144\n",
      "23:00:03 [DEBUG] train episode 498: reward = -166.00, steps = 167\n",
      "23:00:04 [DEBUG] train episode 499: reward = -207.00, steps = 208\n",
      "23:00:04 [DEBUG] train episode 500: reward = -193.00, steps = 194\n",
      "23:00:04 [DEBUG] train episode 501: reward = -194.00, steps = 195\n",
      "23:00:04 [DEBUG] train episode 502: reward = -138.00, steps = 139\n",
      "23:00:05 [DEBUG] train episode 503: reward = -249.00, steps = 250\n",
      "23:00:05 [DEBUG] train episode 504: reward = -289.00, steps = 290\n",
      "23:00:05 [DEBUG] train episode 505: reward = -241.00, steps = 242\n",
      "23:00:05 [DEBUG] train episode 506: reward = -132.00, steps = 133\n",
      "23:00:05 [DEBUG] train episode 507: reward = -150.00, steps = 151\n",
      "23:00:06 [DEBUG] train episode 508: reward = -176.00, steps = 177\n",
      "23:00:06 [DEBUG] train episode 509: reward = -225.00, steps = 226\n",
      "23:00:06 [DEBUG] train episode 510: reward = -261.00, steps = 262\n",
      "23:00:06 [DEBUG] train episode 511: reward = -155.00, steps = 156\n",
      "23:00:07 [DEBUG] train episode 512: reward = -159.00, steps = 160\n",
      "23:00:07 [DEBUG] train episode 513: reward = -172.00, steps = 173\n",
      "23:00:07 [DEBUG] train episode 514: reward = -165.00, steps = 166\n",
      "23:00:07 [DEBUG] train episode 515: reward = -144.00, steps = 145\n",
      "23:00:07 [DEBUG] train episode 516: reward = -156.00, steps = 157\n",
      "23:00:07 [DEBUG] train episode 517: reward = -184.00, steps = 185\n",
      "23:00:08 [DEBUG] train episode 518: reward = -172.00, steps = 173\n",
      "23:00:08 [DEBUG] train episode 519: reward = -296.00, steps = 297\n",
      "23:00:08 [DEBUG] train episode 520: reward = -110.00, steps = 111\n",
      "23:00:08 [DEBUG] train episode 521: reward = -159.00, steps = 160\n",
      "23:00:08 [DEBUG] train episode 522: reward = -190.00, steps = 191\n",
      "23:00:09 [DEBUG] train episode 523: reward = -262.00, steps = 263\n",
      "23:00:09 [DEBUG] train episode 524: reward = -158.00, steps = 159\n",
      "23:00:09 [DEBUG] train episode 525: reward = -151.00, steps = 152\n",
      "23:00:09 [DEBUG] train episode 526: reward = -162.00, steps = 163\n",
      "23:00:09 [DEBUG] train episode 527: reward = -176.00, steps = 177\n",
      "23:00:10 [DEBUG] train episode 528: reward = -190.00, steps = 191\n",
      "23:00:10 [DEBUG] train episode 529: reward = -175.00, steps = 176\n",
      "23:00:10 [DEBUG] train episode 530: reward = -206.00, steps = 207\n",
      "23:00:10 [DEBUG] train episode 531: reward = -244.00, steps = 245\n",
      "23:00:10 [DEBUG] train episode 532: reward = -136.00, steps = 137\n",
      "23:00:11 [DEBUG] train episode 533: reward = -141.00, steps = 142\n",
      "23:00:11 [DEBUG] train episode 534: reward = -133.00, steps = 134\n",
      "23:00:11 [DEBUG] train episode 535: reward = -206.00, steps = 207\n",
      "23:00:11 [DEBUG] train episode 536: reward = -169.00, steps = 170\n",
      "23:00:11 [DEBUG] train episode 537: reward = -178.00, steps = 179\n",
      "23:00:11 [DEBUG] train episode 538: reward = -161.00, steps = 162\n",
      "23:00:12 [DEBUG] train episode 539: reward = -139.00, steps = 140\n",
      "23:00:12 [DEBUG] train episode 540: reward = -192.00, steps = 193\n",
      "23:00:12 [DEBUG] train episode 541: reward = -200.00, steps = 201\n",
      "23:00:12 [DEBUG] train episode 542: reward = -115.00, steps = 116\n",
      "23:00:12 [DEBUG] train episode 543: reward = -163.00, steps = 164\n",
      "23:00:13 [DEBUG] train episode 544: reward = -277.00, steps = 278\n",
      "23:00:13 [DEBUG] train episode 545: reward = -190.00, steps = 191\n",
      "23:00:13 [DEBUG] train episode 546: reward = -179.00, steps = 180\n",
      "23:00:13 [DEBUG] train episode 547: reward = -187.00, steps = 188\n",
      "23:00:14 [DEBUG] train episode 548: reward = -314.00, steps = 315\n",
      "23:00:14 [DEBUG] train episode 549: reward = -187.00, steps = 188\n",
      "23:00:14 [DEBUG] train episode 550: reward = -243.00, steps = 244\n",
      "23:00:15 [DEBUG] train episode 551: reward = -174.00, steps = 175\n",
      "23:00:15 [DEBUG] train episode 552: reward = -197.00, steps = 198\n",
      "23:00:15 [DEBUG] train episode 553: reward = -191.00, steps = 192\n",
      "23:00:15 [DEBUG] train episode 554: reward = -175.00, steps = 176\n",
      "23:00:15 [DEBUG] train episode 555: reward = -184.00, steps = 185\n",
      "23:00:15 [DEBUG] train episode 556: reward = -144.00, steps = 145\n",
      "23:00:16 [DEBUG] train episode 557: reward = -205.00, steps = 206\n",
      "23:00:16 [DEBUG] train episode 558: reward = -152.00, steps = 153\n",
      "23:00:16 [DEBUG] train episode 559: reward = -165.00, steps = 166\n",
      "23:00:16 [DEBUG] train episode 560: reward = -173.00, steps = 174\n",
      "23:00:16 [DEBUG] train episode 561: reward = -184.00, steps = 185\n",
      "23:00:17 [DEBUG] train episode 562: reward = -167.00, steps = 168\n",
      "23:00:17 [DEBUG] train episode 563: reward = -221.00, steps = 222\n",
      "23:00:17 [DEBUG] train episode 564: reward = -184.00, steps = 185\n",
      "23:00:17 [DEBUG] train episode 565: reward = -188.00, steps = 189\n",
      "23:00:18 [DEBUG] train episode 566: reward = -181.00, steps = 182\n",
      "23:00:18 [DEBUG] train episode 567: reward = -170.00, steps = 171\n",
      "23:00:18 [DEBUG] train episode 568: reward = -189.00, steps = 190\n",
      "23:00:18 [DEBUG] train episode 569: reward = -159.00, steps = 160\n",
      "23:00:18 [DEBUG] train episode 570: reward = -186.00, steps = 187\n",
      "23:00:19 [DEBUG] train episode 571: reward = -168.00, steps = 169\n",
      "23:00:19 [DEBUG] train episode 572: reward = -140.00, steps = 141\n",
      "23:00:19 [DEBUG] train episode 573: reward = -194.00, steps = 195\n",
      "23:00:19 [DEBUG] train episode 574: reward = -220.00, steps = 221\n",
      "23:00:19 [DEBUG] train episode 575: reward = -168.00, steps = 169\n",
      "23:00:20 [DEBUG] train episode 576: reward = -156.00, steps = 157\n",
      "23:00:20 [DEBUG] train episode 577: reward = -196.00, steps = 197\n",
      "23:00:20 [DEBUG] train episode 578: reward = -170.00, steps = 171\n",
      "23:00:20 [DEBUG] train episode 579: reward = -181.00, steps = 182\n",
      "23:00:21 [DEBUG] train episode 580: reward = -364.00, steps = 365\n",
      "23:00:21 [DEBUG] train episode 581: reward = -215.00, steps = 216\n",
      "23:00:21 [DEBUG] train episode 582: reward = -166.00, steps = 167\n",
      "23:00:21 [DEBUG] train episode 583: reward = -183.00, steps = 184\n",
      "23:00:21 [DEBUG] train episode 584: reward = -164.00, steps = 165\n",
      "23:00:22 [DEBUG] train episode 585: reward = -131.00, steps = 132\n",
      "23:00:22 [DEBUG] train episode 586: reward = -235.00, steps = 236\n",
      "23:00:22 [DEBUG] train episode 587: reward = -202.00, steps = 203\n",
      "23:00:22 [DEBUG] train episode 588: reward = -170.00, steps = 171\n",
      "23:00:22 [DEBUG] train episode 589: reward = -151.00, steps = 152\n",
      "23:00:23 [DEBUG] train episode 590: reward = -205.00, steps = 206\n",
      "23:00:23 [DEBUG] train episode 591: reward = -168.00, steps = 169\n",
      "23:00:23 [DEBUG] train episode 592: reward = -179.00, steps = 180\n",
      "23:00:23 [DEBUG] train episode 593: reward = -216.00, steps = 217\n",
      "23:00:23 [DEBUG] train episode 594: reward = -181.00, steps = 182\n",
      "23:00:24 [DEBUG] train episode 595: reward = -149.00, steps = 150\n",
      "23:00:24 [DEBUG] train episode 596: reward = -138.00, steps = 139\n",
      "23:00:24 [DEBUG] train episode 597: reward = -160.00, steps = 161\n",
      "23:00:24 [DEBUG] train episode 598: reward = -163.00, steps = 164\n",
      "23:00:24 [DEBUG] train episode 599: reward = -179.00, steps = 180\n",
      "23:00:25 [DEBUG] train episode 600: reward = -191.00, steps = 192\n",
      "23:00:25 [DEBUG] train episode 601: reward = -202.00, steps = 203\n",
      "23:00:25 [DEBUG] train episode 602: reward = -247.00, steps = 248\n",
      "23:00:25 [DEBUG] train episode 603: reward = -252.00, steps = 253\n",
      "23:00:25 [DEBUG] train episode 604: reward = -209.00, steps = 210\n",
      "23:00:26 [DEBUG] train episode 605: reward = -168.00, steps = 169\n",
      "23:00:26 [DEBUG] train episode 606: reward = -226.00, steps = 227\n",
      "23:00:26 [DEBUG] train episode 607: reward = -153.00, steps = 154\n",
      "23:00:26 [DEBUG] train episode 608: reward = -130.00, steps = 131\n",
      "23:00:27 [DEBUG] train episode 609: reward = -184.00, steps = 185\n",
      "23:00:27 [DEBUG] train episode 610: reward = -179.00, steps = 180\n",
      "23:00:27 [DEBUG] train episode 611: reward = -165.00, steps = 166\n",
      "23:00:27 [DEBUG] train episode 612: reward = -326.00, steps = 327\n",
      "23:00:27 [DEBUG] train episode 613: reward = -200.00, steps = 201\n",
      "23:00:28 [DEBUG] train episode 614: reward = -139.00, steps = 140\n",
      "23:00:28 [DEBUG] train episode 615: reward = -230.00, steps = 231\n",
      "23:00:28 [DEBUG] train episode 616: reward = -181.00, steps = 182\n",
      "23:00:28 [DEBUG] train episode 617: reward = -190.00, steps = 191\n",
      "23:00:28 [DEBUG] train episode 618: reward = -137.00, steps = 138\n",
      "23:00:29 [DEBUG] train episode 619: reward = -183.00, steps = 184\n",
      "23:00:29 [DEBUG] train episode 620: reward = -210.00, steps = 211\n",
      "23:00:29 [DEBUG] train episode 621: reward = -151.00, steps = 152\n",
      "23:00:29 [DEBUG] train episode 622: reward = -295.00, steps = 296\n",
      "23:00:30 [DEBUG] train episode 623: reward = -227.00, steps = 228\n",
      "23:00:30 [DEBUG] train episode 624: reward = -120.00, steps = 121\n",
      "23:00:30 [DEBUG] train episode 625: reward = -201.00, steps = 202\n",
      "23:00:30 [DEBUG] train episode 626: reward = -210.00, steps = 211\n",
      "23:00:30 [DEBUG] train episode 627: reward = -140.00, steps = 141\n",
      "23:00:30 [DEBUG] train episode 628: reward = -158.00, steps = 159\n",
      "23:00:31 [DEBUG] train episode 629: reward = -137.00, steps = 138\n",
      "23:00:31 [DEBUG] train episode 630: reward = -236.00, steps = 237\n",
      "23:00:31 [DEBUG] train episode 631: reward = -172.00, steps = 173\n",
      "23:00:31 [DEBUG] train episode 632: reward = -297.00, steps = 298\n",
      "23:00:32 [DEBUG] train episode 633: reward = -126.00, steps = 127\n",
      "23:00:32 [DEBUG] train episode 634: reward = -172.00, steps = 173\n",
      "23:00:32 [DEBUG] train episode 635: reward = -253.00, steps = 254\n",
      "23:00:32 [DEBUG] train episode 636: reward = -202.00, steps = 203\n",
      "23:00:32 [DEBUG] train episode 637: reward = -203.00, steps = 204\n",
      "23:00:33 [DEBUG] train episode 638: reward = -179.00, steps = 180\n",
      "23:00:33 [DEBUG] train episode 639: reward = -169.00, steps = 170\n",
      "23:00:33 [DEBUG] train episode 640: reward = -166.00, steps = 167\n",
      "23:00:33 [DEBUG] train episode 641: reward = -167.00, steps = 168\n",
      "23:00:34 [DEBUG] train episode 642: reward = -272.00, steps = 273\n",
      "23:00:34 [DEBUG] train episode 643: reward = -165.00, steps = 166\n",
      "23:00:34 [DEBUG] train episode 644: reward = -153.00, steps = 154\n",
      "23:00:34 [DEBUG] train episode 645: reward = -165.00, steps = 166\n",
      "23:00:34 [DEBUG] train episode 646: reward = -225.00, steps = 226\n",
      "23:00:35 [DEBUG] train episode 647: reward = -172.00, steps = 173\n",
      "23:00:35 [DEBUG] train episode 648: reward = -204.00, steps = 205\n",
      "23:00:35 [DEBUG] train episode 649: reward = -142.00, steps = 143\n",
      "23:00:35 [DEBUG] train episode 650: reward = -180.00, steps = 181\n",
      "23:00:35 [DEBUG] train episode 651: reward = -194.00, steps = 195\n",
      "23:00:36 [DEBUG] train episode 652: reward = -195.00, steps = 196\n",
      "23:00:36 [DEBUG] train episode 653: reward = -184.00, steps = 185\n",
      "23:00:36 [DEBUG] train episode 654: reward = -114.00, steps = 115\n",
      "23:00:36 [DEBUG] train episode 655: reward = -167.00, steps = 168\n",
      "23:00:36 [DEBUG] train episode 656: reward = -201.00, steps = 202\n",
      "23:00:37 [DEBUG] train episode 657: reward = -154.00, steps = 155\n",
      "23:00:37 [DEBUG] train episode 658: reward = -124.00, steps = 125\n",
      "23:00:37 [DEBUG] train episode 659: reward = -167.00, steps = 168\n",
      "23:00:37 [DEBUG] train episode 660: reward = -175.00, steps = 176\n",
      "23:00:37 [DEBUG] train episode 661: reward = -229.00, steps = 230\n",
      "23:00:37 [DEBUG] train episode 662: reward = -158.00, steps = 159\n",
      "23:00:38 [DEBUG] train episode 663: reward = -196.00, steps = 197\n",
      "23:00:38 [DEBUG] train episode 664: reward = -201.00, steps = 202\n",
      "23:00:38 [DEBUG] train episode 665: reward = -211.00, steps = 212\n",
      "23:00:38 [DEBUG] train episode 666: reward = -195.00, steps = 196\n",
      "23:00:39 [DEBUG] train episode 667: reward = -196.00, steps = 197\n",
      "23:00:39 [DEBUG] train episode 668: reward = -257.00, steps = 258\n",
      "23:00:39 [DEBUG] train episode 669: reward = -135.00, steps = 136\n",
      "23:00:39 [DEBUG] train episode 670: reward = -214.00, steps = 215\n",
      "23:00:39 [DEBUG] train episode 671: reward = -176.00, steps = 177\n",
      "23:00:40 [DEBUG] train episode 672: reward = -162.00, steps = 163\n",
      "23:00:40 [DEBUG] train episode 673: reward = -175.00, steps = 176\n",
      "23:00:40 [DEBUG] train episode 674: reward = -200.00, steps = 201\n",
      "23:00:40 [DEBUG] train episode 675: reward = -191.00, steps = 192\n",
      "23:00:40 [DEBUG] train episode 676: reward = -128.00, steps = 129\n",
      "23:00:41 [DEBUG] train episode 677: reward = -143.00, steps = 144\n",
      "23:00:41 [DEBUG] train episode 678: reward = -187.00, steps = 188\n",
      "23:00:41 [DEBUG] train episode 679: reward = -129.00, steps = 130\n",
      "23:00:41 [DEBUG] train episode 680: reward = -175.00, steps = 176\n",
      "23:00:41 [DEBUG] train episode 681: reward = -171.00, steps = 172\n",
      "23:00:42 [DEBUG] train episode 682: reward = -324.00, steps = 325\n",
      "23:00:42 [DEBUG] train episode 683: reward = -131.00, steps = 132\n",
      "23:00:42 [DEBUG] train episode 684: reward = -152.00, steps = 153\n",
      "23:00:42 [DEBUG] train episode 685: reward = -181.00, steps = 182\n",
      "23:00:42 [DEBUG] train episode 686: reward = -144.00, steps = 145\n",
      "23:00:43 [DEBUG] train episode 687: reward = -200.00, steps = 201\n",
      "23:00:43 [DEBUG] train episode 688: reward = -209.00, steps = 210\n",
      "23:00:43 [DEBUG] train episode 689: reward = -152.00, steps = 153\n",
      "23:00:43 [DEBUG] train episode 690: reward = -207.00, steps = 208\n",
      "23:00:43 [DEBUG] train episode 691: reward = -130.00, steps = 131\n",
      "23:00:44 [DEBUG] train episode 692: reward = -169.00, steps = 170\n",
      "23:00:44 [DEBUG] train episode 693: reward = -180.00, steps = 181\n",
      "23:00:44 [DEBUG] train episode 694: reward = -324.00, steps = 325\n",
      "23:00:44 [DEBUG] train episode 695: reward = -175.00, steps = 176\n",
      "23:00:44 [DEBUG] train episode 696: reward = -191.00, steps = 192\n",
      "23:00:45 [DEBUG] train episode 697: reward = -201.00, steps = 202\n",
      "23:00:45 [DEBUG] train episode 698: reward = -180.00, steps = 181\n",
      "23:00:45 [DEBUG] train episode 699: reward = -172.00, steps = 173\n",
      "23:00:45 [DEBUG] train episode 700: reward = -181.00, steps = 182\n",
      "23:00:45 [DEBUG] train episode 701: reward = -140.00, steps = 141\n",
      "23:00:46 [DEBUG] train episode 702: reward = -149.00, steps = 150\n",
      "23:00:46 [DEBUG] train episode 703: reward = -291.00, steps = 292\n",
      "23:00:46 [DEBUG] train episode 704: reward = -188.00, steps = 189\n",
      "23:00:46 [DEBUG] train episode 705: reward = -210.00, steps = 211\n",
      "23:00:46 [DEBUG] train episode 706: reward = -169.00, steps = 170\n",
      "23:00:47 [DEBUG] train episode 707: reward = -167.00, steps = 168\n",
      "23:00:47 [DEBUG] train episode 708: reward = -175.00, steps = 176\n",
      "23:00:47 [DEBUG] train episode 709: reward = -145.00, steps = 146\n",
      "23:00:47 [DEBUG] train episode 710: reward = -168.00, steps = 169\n",
      "23:00:47 [DEBUG] train episode 711: reward = -142.00, steps = 143\n",
      "23:00:48 [DEBUG] train episode 712: reward = -151.00, steps = 152\n",
      "23:00:48 [DEBUG] train episode 713: reward = -118.00, steps = 119\n",
      "23:00:48 [DEBUG] train episode 714: reward = -255.00, steps = 256\n",
      "23:00:48 [DEBUG] train episode 715: reward = -249.00, steps = 250\n",
      "23:00:48 [DEBUG] train episode 716: reward = -155.00, steps = 156\n",
      "23:00:49 [DEBUG] train episode 717: reward = -163.00, steps = 164\n",
      "23:00:49 [DEBUG] train episode 718: reward = -167.00, steps = 168\n",
      "23:00:49 [DEBUG] train episode 719: reward = -163.00, steps = 164\n",
      "23:00:49 [DEBUG] train episode 720: reward = -141.00, steps = 142\n",
      "23:00:49 [DEBUG] train episode 721: reward = -148.00, steps = 149\n",
      "23:00:50 [DEBUG] train episode 722: reward = -187.00, steps = 188\n",
      "23:00:50 [DEBUG] train episode 723: reward = -177.00, steps = 178\n",
      "23:00:50 [DEBUG] train episode 724: reward = -183.00, steps = 184\n",
      "23:00:50 [DEBUG] train episode 725: reward = -221.00, steps = 222\n",
      "23:00:51 [DEBUG] train episode 726: reward = -189.00, steps = 190\n",
      "23:00:51 [DEBUG] train episode 727: reward = -167.00, steps = 168\n",
      "23:00:51 [DEBUG] train episode 728: reward = -140.00, steps = 141\n",
      "23:00:51 [DEBUG] train episode 729: reward = -181.00, steps = 182\n",
      "23:00:51 [DEBUG] train episode 730: reward = -214.00, steps = 215\n",
      "23:00:52 [DEBUG] train episode 731: reward = -192.00, steps = 193\n",
      "23:00:52 [DEBUG] train episode 732: reward = -210.00, steps = 211\n",
      "23:00:52 [DEBUG] train episode 733: reward = -155.00, steps = 156\n",
      "23:00:52 [DEBUG] train episode 734: reward = -174.00, steps = 175\n",
      "23:00:52 [DEBUG] train episode 735: reward = -163.00, steps = 164\n",
      "23:00:53 [DEBUG] train episode 736: reward = -161.00, steps = 162\n",
      "23:00:53 [DEBUG] train episode 737: reward = -156.00, steps = 157\n",
      "23:00:53 [DEBUG] train episode 738: reward = -136.00, steps = 137\n",
      "23:00:53 [DEBUG] train episode 739: reward = -197.00, steps = 198\n",
      "23:00:53 [DEBUG] train episode 740: reward = -197.00, steps = 198\n",
      "23:00:54 [DEBUG] train episode 741: reward = -131.00, steps = 132\n",
      "23:00:54 [DEBUG] train episode 742: reward = -171.00, steps = 172\n",
      "23:00:54 [DEBUG] train episode 743: reward = -117.00, steps = 118\n",
      "23:00:54 [DEBUG] train episode 744: reward = -171.00, steps = 172\n",
      "23:00:54 [DEBUG] train episode 745: reward = -133.00, steps = 134\n",
      "23:00:54 [DEBUG] train episode 746: reward = -186.00, steps = 187\n",
      "23:00:55 [DEBUG] train episode 747: reward = -139.00, steps = 140\n",
      "23:00:55 [DEBUG] train episode 748: reward = -174.00, steps = 175\n",
      "23:00:55 [DEBUG] train episode 749: reward = -206.00, steps = 207\n",
      "23:00:55 [DEBUG] train episode 750: reward = -158.00, steps = 159\n",
      "23:00:55 [DEBUG] train episode 751: reward = -235.00, steps = 236\n",
      "23:00:55 [DEBUG] train episode 752: reward = -160.00, steps = 161\n",
      "23:00:56 [DEBUG] train episode 753: reward = -183.00, steps = 184\n",
      "23:00:56 [DEBUG] train episode 754: reward = -181.00, steps = 182\n",
      "23:00:56 [DEBUG] train episode 755: reward = -167.00, steps = 168\n",
      "23:00:56 [DEBUG] train episode 756: reward = -181.00, steps = 182\n",
      "23:00:57 [DEBUG] train episode 757: reward = -500.00, steps = 500\n",
      "23:00:57 [DEBUG] train episode 758: reward = -178.00, steps = 179\n",
      "23:00:57 [DEBUG] train episode 759: reward = -154.00, steps = 155\n",
      "23:00:57 [DEBUG] train episode 760: reward = -176.00, steps = 177\n",
      "23:00:58 [DEBUG] train episode 761: reward = -221.00, steps = 222\n",
      "23:00:58 [DEBUG] train episode 762: reward = -145.00, steps = 146\n",
      "23:00:58 [DEBUG] train episode 763: reward = -175.00, steps = 176\n",
      "23:00:58 [DEBUG] train episode 764: reward = -135.00, steps = 136\n",
      "23:00:58 [DEBUG] train episode 765: reward = -131.00, steps = 132\n",
      "23:00:59 [DEBUG] train episode 766: reward = -177.00, steps = 178\n",
      "23:00:59 [DEBUG] train episode 767: reward = -183.00, steps = 184\n",
      "23:00:59 [DEBUG] train episode 768: reward = -190.00, steps = 191\n",
      "23:00:59 [DEBUG] train episode 769: reward = -154.00, steps = 155\n",
      "23:00:59 [DEBUG] train episode 770: reward = -126.00, steps = 127\n",
      "23:01:00 [DEBUG] train episode 771: reward = -174.00, steps = 175\n",
      "23:01:00 [DEBUG] train episode 772: reward = -203.00, steps = 204\n",
      "23:01:00 [DEBUG] train episode 773: reward = -164.00, steps = 165\n",
      "23:01:00 [DEBUG] train episode 774: reward = -155.00, steps = 156\n",
      "23:01:00 [DEBUG] train episode 775: reward = -123.00, steps = 124\n",
      "23:01:01 [DEBUG] train episode 776: reward = -165.00, steps = 166\n",
      "23:01:01 [DEBUG] train episode 777: reward = -128.00, steps = 129\n",
      "23:01:01 [DEBUG] train episode 778: reward = -178.00, steps = 179\n",
      "23:01:01 [DEBUG] train episode 779: reward = -175.00, steps = 176\n",
      "23:01:01 [DEBUG] train episode 780: reward = -200.00, steps = 201\n",
      "23:01:01 [DEBUG] train episode 781: reward = -155.00, steps = 156\n",
      "23:01:02 [DEBUG] train episode 782: reward = -148.00, steps = 149\n",
      "23:01:02 [DEBUG] train episode 783: reward = -204.00, steps = 205\n",
      "23:01:02 [DEBUG] train episode 784: reward = -149.00, steps = 150\n",
      "23:01:02 [DEBUG] train episode 785: reward = -212.00, steps = 213\n",
      "23:01:02 [DEBUG] train episode 786: reward = -149.00, steps = 150\n",
      "23:01:03 [DEBUG] train episode 787: reward = -150.00, steps = 151\n",
      "23:01:03 [DEBUG] train episode 788: reward = -170.00, steps = 171\n",
      "23:01:03 [DEBUG] train episode 789: reward = -202.00, steps = 203\n",
      "23:01:03 [DEBUG] train episode 790: reward = -233.00, steps = 234\n",
      "23:01:03 [DEBUG] train episode 791: reward = -162.00, steps = 163\n",
      "23:01:04 [DEBUG] train episode 792: reward = -193.00, steps = 194\n",
      "23:01:04 [DEBUG] train episode 793: reward = -187.00, steps = 188\n",
      "23:01:04 [DEBUG] train episode 794: reward = -167.00, steps = 168\n",
      "23:01:04 [DEBUG] train episode 795: reward = -159.00, steps = 160\n",
      "23:01:04 [DEBUG] train episode 796: reward = -166.00, steps = 167\n",
      "23:01:04 [DEBUG] train episode 797: reward = -152.00, steps = 153\n",
      "23:01:05 [DEBUG] train episode 798: reward = -235.00, steps = 236\n",
      "23:01:05 [DEBUG] train episode 799: reward = -175.00, steps = 176\n",
      "23:01:05 [DEBUG] train episode 800: reward = -184.00, steps = 185\n",
      "23:01:05 [DEBUG] train episode 801: reward = -191.00, steps = 192\n",
      "23:01:05 [DEBUG] train episode 802: reward = -140.00, steps = 141\n",
      "23:01:06 [DEBUG] train episode 803: reward = -115.00, steps = 116\n",
      "23:01:06 [DEBUG] train episode 804: reward = -170.00, steps = 171\n",
      "23:01:06 [DEBUG] train episode 805: reward = -161.00, steps = 162\n",
      "23:01:06 [DEBUG] train episode 806: reward = -183.00, steps = 184\n",
      "23:01:06 [DEBUG] train episode 807: reward = -158.00, steps = 159\n",
      "23:01:07 [DEBUG] train episode 808: reward = -193.00, steps = 194\n",
      "23:01:07 [DEBUG] train episode 809: reward = -147.00, steps = 148\n",
      "23:01:07 [DEBUG] train episode 810: reward = -197.00, steps = 198\n",
      "23:01:07 [DEBUG] train episode 811: reward = -166.00, steps = 167\n",
      "23:01:07 [DEBUG] train episode 812: reward = -144.00, steps = 145\n",
      "23:01:07 [DEBUG] train episode 813: reward = -141.00, steps = 142\n",
      "23:01:08 [DEBUG] train episode 814: reward = -121.00, steps = 122\n",
      "23:01:08 [DEBUG] train episode 815: reward = -165.00, steps = 166\n",
      "23:01:08 [DEBUG] train episode 816: reward = -192.00, steps = 193\n",
      "23:01:08 [DEBUG] train episode 817: reward = -169.00, steps = 170\n",
      "23:01:08 [DEBUG] train episode 818: reward = -222.00, steps = 223\n",
      "23:01:09 [DEBUG] train episode 819: reward = -181.00, steps = 182\n",
      "23:01:09 [DEBUG] train episode 820: reward = -161.00, steps = 162\n",
      "23:01:09 [DEBUG] train episode 821: reward = -133.00, steps = 134\n",
      "23:01:09 [DEBUG] train episode 822: reward = -174.00, steps = 175\n",
      "23:01:09 [DEBUG] train episode 823: reward = -154.00, steps = 155\n",
      "23:01:09 [DEBUG] train episode 824: reward = -165.00, steps = 166\n",
      "23:01:10 [DEBUG] train episode 825: reward = -214.00, steps = 215\n",
      "23:01:10 [DEBUG] train episode 826: reward = -177.00, steps = 178\n",
      "23:01:10 [DEBUG] train episode 827: reward = -201.00, steps = 202\n",
      "23:01:10 [DEBUG] train episode 828: reward = -276.00, steps = 277\n",
      "23:01:10 [DEBUG] train episode 829: reward = -212.00, steps = 213\n",
      "23:01:11 [DEBUG] train episode 830: reward = -132.00, steps = 133\n",
      "23:01:11 [DEBUG] train episode 831: reward = -137.00, steps = 138\n",
      "23:01:11 [DEBUG] train episode 832: reward = -164.00, steps = 165\n",
      "23:01:11 [DEBUG] train episode 833: reward = -122.00, steps = 123\n",
      "23:01:11 [DEBUG] train episode 834: reward = -176.00, steps = 177\n",
      "23:01:11 [DEBUG] train episode 835: reward = -161.00, steps = 162\n",
      "23:01:12 [DEBUG] train episode 836: reward = -132.00, steps = 133\n",
      "23:01:12 [DEBUG] train episode 837: reward = -194.00, steps = 195\n",
      "23:01:12 [DEBUG] train episode 838: reward = -162.00, steps = 163\n",
      "23:01:12 [DEBUG] train episode 839: reward = -127.00, steps = 128\n",
      "23:01:12 [DEBUG] train episode 840: reward = -249.00, steps = 250\n",
      "23:01:13 [DEBUG] train episode 841: reward = -132.00, steps = 133\n",
      "23:01:13 [DEBUG] train episode 842: reward = -195.00, steps = 196\n",
      "23:01:13 [DEBUG] train episode 843: reward = -167.00, steps = 168\n",
      "23:01:13 [DEBUG] train episode 844: reward = -189.00, steps = 190\n",
      "23:01:13 [DEBUG] train episode 845: reward = -165.00, steps = 166\n",
      "23:01:14 [DEBUG] train episode 846: reward = -228.00, steps = 229\n",
      "23:01:14 [DEBUG] train episode 847: reward = -199.00, steps = 200\n",
      "23:01:14 [DEBUG] train episode 848: reward = -162.00, steps = 163\n",
      "23:01:14 [DEBUG] train episode 849: reward = -153.00, steps = 154\n",
      "23:01:14 [DEBUG] train episode 850: reward = -164.00, steps = 165\n",
      "23:01:14 [DEBUG] train episode 851: reward = -232.00, steps = 233\n",
      "23:01:15 [DEBUG] train episode 852: reward = -149.00, steps = 150\n",
      "23:01:15 [DEBUG] train episode 853: reward = -251.00, steps = 252\n",
      "23:01:15 [DEBUG] train episode 854: reward = -141.00, steps = 142\n",
      "23:01:15 [DEBUG] train episode 855: reward = -158.00, steps = 159\n",
      "23:01:15 [DEBUG] train episode 856: reward = -165.00, steps = 166\n",
      "23:01:15 [DEBUG] train episode 857: reward = -162.00, steps = 163\n",
      "23:01:16 [DEBUG] train episode 858: reward = -117.00, steps = 118\n",
      "23:01:16 [DEBUG] train episode 859: reward = -173.00, steps = 174\n",
      "23:01:16 [DEBUG] train episode 860: reward = -177.00, steps = 178\n",
      "23:01:16 [DEBUG] train episode 861: reward = -133.00, steps = 134\n",
      "23:01:16 [DEBUG] train episode 862: reward = -129.00, steps = 130\n",
      "23:01:16 [DEBUG] train episode 863: reward = -194.00, steps = 195\n",
      "23:01:17 [DEBUG] train episode 864: reward = -143.00, steps = 144\n",
      "23:01:17 [DEBUG] train episode 865: reward = -196.00, steps = 197\n",
      "23:01:17 [DEBUG] train episode 866: reward = -155.00, steps = 156\n",
      "23:01:17 [DEBUG] train episode 867: reward = -176.00, steps = 177\n",
      "23:01:17 [DEBUG] train episode 868: reward = -147.00, steps = 148\n",
      "23:01:17 [DEBUG] train episode 869: reward = -164.00, steps = 165\n",
      "23:01:18 [DEBUG] train episode 870: reward = -359.00, steps = 360\n",
      "23:01:18 [DEBUG] train episode 871: reward = -145.00, steps = 146\n",
      "23:01:18 [DEBUG] train episode 872: reward = -165.00, steps = 166\n",
      "23:01:18 [DEBUG] train episode 873: reward = -148.00, steps = 149\n",
      "23:01:19 [DEBUG] train episode 874: reward = -212.00, steps = 213\n",
      "23:01:19 [DEBUG] train episode 875: reward = -199.00, steps = 200\n",
      "23:01:19 [DEBUG] train episode 876: reward = -219.00, steps = 220\n",
      "23:01:19 [DEBUG] train episode 877: reward = -171.00, steps = 172\n",
      "23:01:19 [DEBUG] train episode 878: reward = -147.00, steps = 148\n",
      "23:01:19 [DEBUG] train episode 879: reward = -232.00, steps = 233\n",
      "23:01:20 [DEBUG] train episode 880: reward = -148.00, steps = 149\n",
      "23:01:20 [DEBUG] train episode 881: reward = -139.00, steps = 140\n",
      "23:01:20 [DEBUG] train episode 882: reward = -226.00, steps = 227\n",
      "23:01:20 [DEBUG] train episode 883: reward = -151.00, steps = 152\n",
      "23:01:20 [DEBUG] train episode 884: reward = -205.00, steps = 206\n",
      "23:01:20 [DEBUG] train episode 885: reward = -124.00, steps = 125\n",
      "23:01:21 [DEBUG] train episode 886: reward = -197.00, steps = 198\n",
      "23:01:21 [DEBUG] train episode 887: reward = -211.00, steps = 212\n",
      "23:01:21 [DEBUG] train episode 888: reward = -165.00, steps = 166\n",
      "23:01:21 [DEBUG] train episode 889: reward = -172.00, steps = 173\n",
      "23:01:21 [DEBUG] train episode 890: reward = -174.00, steps = 175\n",
      "23:01:22 [DEBUG] train episode 891: reward = -179.00, steps = 180\n",
      "23:01:22 [DEBUG] train episode 892: reward = -160.00, steps = 161\n",
      "23:01:22 [DEBUG] train episode 893: reward = -192.00, steps = 193\n",
      "23:01:22 [DEBUG] train episode 894: reward = -142.00, steps = 143\n",
      "23:01:22 [DEBUG] train episode 895: reward = -208.00, steps = 209\n",
      "23:01:23 [DEBUG] train episode 896: reward = -500.00, steps = 500\n",
      "23:01:23 [DEBUG] train episode 897: reward = -163.00, steps = 164\n",
      "23:01:23 [DEBUG] train episode 898: reward = -155.00, steps = 156\n",
      "23:01:23 [DEBUG] train episode 899: reward = -103.00, steps = 104\n",
      "23:01:23 [DEBUG] train episode 900: reward = -245.00, steps = 246\n",
      "23:01:24 [DEBUG] train episode 901: reward = -152.00, steps = 153\n",
      "23:01:24 [DEBUG] train episode 902: reward = -168.00, steps = 169\n",
      "23:01:24 [DEBUG] train episode 903: reward = -142.00, steps = 143\n",
      "23:01:24 [DEBUG] train episode 904: reward = -213.00, steps = 214\n",
      "23:01:24 [DEBUG] train episode 905: reward = -154.00, steps = 155\n",
      "23:01:24 [DEBUG] train episode 906: reward = -161.00, steps = 162\n",
      "23:01:25 [DEBUG] train episode 907: reward = -136.00, steps = 137\n",
      "23:01:25 [DEBUG] train episode 908: reward = -168.00, steps = 169\n",
      "23:01:25 [DEBUG] train episode 909: reward = -164.00, steps = 165\n",
      "23:01:25 [DEBUG] train episode 910: reward = -148.00, steps = 149\n",
      "23:01:25 [DEBUG] train episode 911: reward = -168.00, steps = 169\n",
      "23:01:25 [DEBUG] train episode 912: reward = -142.00, steps = 143\n",
      "23:01:26 [DEBUG] train episode 913: reward = -155.00, steps = 156\n",
      "23:01:26 [DEBUG] train episode 914: reward = -172.00, steps = 173\n",
      "23:01:26 [DEBUG] train episode 915: reward = -184.00, steps = 185\n",
      "23:01:26 [DEBUG] train episode 916: reward = -204.00, steps = 205\n",
      "23:01:26 [DEBUG] train episode 917: reward = -155.00, steps = 156\n",
      "23:01:27 [DEBUG] train episode 918: reward = -150.00, steps = 151\n",
      "23:01:27 [DEBUG] train episode 919: reward = -175.00, steps = 176\n",
      "23:01:27 [DEBUG] train episode 920: reward = -174.00, steps = 175\n",
      "23:01:27 [DEBUG] train episode 921: reward = -155.00, steps = 156\n",
      "23:01:27 [DEBUG] train episode 922: reward = -152.00, steps = 153\n",
      "23:01:27 [DEBUG] train episode 923: reward = -136.00, steps = 137\n",
      "23:01:27 [DEBUG] train episode 924: reward = -138.00, steps = 139\n",
      "23:01:28 [DEBUG] train episode 925: reward = -150.00, steps = 151\n",
      "23:01:28 [DEBUG] train episode 926: reward = -146.00, steps = 147\n",
      "23:01:28 [DEBUG] train episode 927: reward = -171.00, steps = 172\n",
      "23:01:28 [DEBUG] train episode 928: reward = -208.00, steps = 209\n",
      "23:01:28 [DEBUG] train episode 929: reward = -216.00, steps = 217\n",
      "23:01:29 [DEBUG] train episode 930: reward = -159.00, steps = 160\n",
      "23:01:29 [DEBUG] train episode 931: reward = -180.00, steps = 181\n",
      "23:01:29 [DEBUG] train episode 932: reward = -189.00, steps = 190\n",
      "23:01:29 [DEBUG] train episode 933: reward = -143.00, steps = 144\n",
      "23:01:29 [DEBUG] train episode 934: reward = -142.00, steps = 143\n",
      "23:01:29 [DEBUG] train episode 935: reward = -164.00, steps = 165\n",
      "23:01:30 [DEBUG] train episode 936: reward = -145.00, steps = 146\n",
      "23:01:30 [DEBUG] train episode 937: reward = -154.00, steps = 155\n",
      "23:01:30 [DEBUG] train episode 938: reward = -146.00, steps = 147\n",
      "23:01:30 [DEBUG] train episode 939: reward = -178.00, steps = 179\n",
      "23:01:30 [DEBUG] train episode 940: reward = -183.00, steps = 184\n",
      "23:01:30 [DEBUG] train episode 941: reward = -226.00, steps = 227\n",
      "23:01:31 [DEBUG] train episode 942: reward = -182.00, steps = 183\n",
      "23:01:31 [DEBUG] train episode 943: reward = -167.00, steps = 168\n",
      "23:01:31 [DEBUG] train episode 944: reward = -228.00, steps = 229\n",
      "23:01:31 [DEBUG] train episode 945: reward = -139.00, steps = 140\n",
      "23:01:31 [DEBUG] train episode 946: reward = -155.00, steps = 156\n",
      "23:01:31 [DEBUG] train episode 947: reward = -184.00, steps = 185\n",
      "23:01:32 [DEBUG] train episode 948: reward = -173.00, steps = 174\n",
      "23:01:32 [DEBUG] train episode 949: reward = -191.00, steps = 192\n",
      "23:01:32 [DEBUG] train episode 950: reward = -170.00, steps = 171\n",
      "23:01:32 [DEBUG] train episode 951: reward = -194.00, steps = 195\n",
      "23:01:32 [DEBUG] train episode 952: reward = -160.00, steps = 161\n",
      "23:01:33 [DEBUG] train episode 953: reward = -150.00, steps = 151\n",
      "23:01:33 [DEBUG] train episode 954: reward = -168.00, steps = 169\n",
      "23:01:33 [DEBUG] train episode 955: reward = -190.00, steps = 191\n",
      "23:01:33 [DEBUG] train episode 956: reward = -152.00, steps = 153\n",
      "23:01:33 [DEBUG] train episode 957: reward = -142.00, steps = 143\n",
      "23:01:33 [DEBUG] train episode 958: reward = -142.00, steps = 143\n",
      "23:01:34 [DEBUG] train episode 959: reward = -230.00, steps = 231\n",
      "23:01:34 [DEBUG] train episode 960: reward = -188.00, steps = 189\n",
      "23:01:34 [DEBUG] train episode 961: reward = -179.00, steps = 180\n",
      "23:01:34 [DEBUG] train episode 962: reward = -319.00, steps = 320\n",
      "23:01:35 [DEBUG] train episode 963: reward = -170.00, steps = 171\n",
      "23:01:35 [DEBUG] train episode 964: reward = -165.00, steps = 166\n",
      "23:01:35 [DEBUG] train episode 965: reward = -128.00, steps = 129\n",
      "23:01:35 [DEBUG] train episode 966: reward = -144.00, steps = 145\n",
      "23:01:35 [DEBUG] train episode 967: reward = -202.00, steps = 203\n",
      "23:01:35 [DEBUG] train episode 968: reward = -154.00, steps = 155\n",
      "23:01:36 [DEBUG] train episode 969: reward = -169.00, steps = 170\n",
      "23:01:36 [DEBUG] train episode 970: reward = -179.00, steps = 180\n",
      "23:01:36 [DEBUG] train episode 971: reward = -158.00, steps = 159\n",
      "23:01:36 [DEBUG] train episode 972: reward = -159.00, steps = 160\n",
      "23:01:36 [DEBUG] train episode 973: reward = -149.00, steps = 150\n",
      "23:01:36 [DEBUG] train episode 974: reward = -171.00, steps = 172\n",
      "23:01:37 [DEBUG] train episode 975: reward = -206.00, steps = 207\n",
      "23:01:37 [DEBUG] train episode 976: reward = -141.00, steps = 142\n",
      "23:01:37 [DEBUG] train episode 977: reward = -215.00, steps = 216\n",
      "23:01:37 [DEBUG] train episode 978: reward = -169.00, steps = 170\n",
      "23:01:37 [DEBUG] train episode 979: reward = -134.00, steps = 135\n",
      "23:01:38 [DEBUG] train episode 980: reward = -172.00, steps = 173\n",
      "23:01:38 [DEBUG] train episode 981: reward = -202.00, steps = 203\n",
      "23:01:38 [DEBUG] train episode 982: reward = -140.00, steps = 141\n",
      "23:01:38 [DEBUG] train episode 983: reward = -144.00, steps = 145\n",
      "23:01:38 [DEBUG] train episode 984: reward = -214.00, steps = 215\n",
      "23:01:39 [DEBUG] train episode 985: reward = -170.00, steps = 171\n",
      "23:01:39 [DEBUG] train episode 986: reward = -238.00, steps = 239\n",
      "23:01:39 [DEBUG] train episode 987: reward = -166.00, steps = 167\n",
      "23:01:39 [DEBUG] train episode 988: reward = -171.00, steps = 172\n",
      "23:01:39 [DEBUG] train episode 989: reward = -117.00, steps = 118\n",
      "23:01:39 [DEBUG] train episode 990: reward = -170.00, steps = 171\n",
      "23:01:40 [DEBUG] train episode 991: reward = -159.00, steps = 160\n",
      "23:01:40 [DEBUG] train episode 992: reward = -185.00, steps = 186\n",
      "23:01:40 [DEBUG] train episode 993: reward = -152.00, steps = 153\n",
      "23:01:40 [DEBUG] train episode 994: reward = -166.00, steps = 167\n",
      "23:01:40 [DEBUG] train episode 995: reward = -189.00, steps = 190\n",
      "23:01:41 [DEBUG] train episode 996: reward = -212.00, steps = 213\n",
      "23:01:41 [DEBUG] train episode 997: reward = -181.00, steps = 182\n",
      "23:01:41 [DEBUG] train episode 998: reward = -151.00, steps = 152\n",
      "23:01:41 [DEBUG] train episode 999: reward = -150.00, steps = 151\n",
      "23:01:41 [DEBUG] train episode 1000: reward = -158.00, steps = 159\n",
      "23:01:41 [DEBUG] train episode 1001: reward = -212.00, steps = 213\n",
      "23:01:42 [DEBUG] train episode 1002: reward = -155.00, steps = 156\n",
      "23:01:42 [DEBUG] train episode 1003: reward = -240.00, steps = 241\n",
      "23:01:42 [DEBUG] train episode 1004: reward = -143.00, steps = 144\n",
      "23:01:42 [DEBUG] train episode 1005: reward = -262.00, steps = 263\n",
      "23:01:42 [DEBUG] train episode 1006: reward = -181.00, steps = 182\n",
      "23:01:43 [DEBUG] train episode 1007: reward = -172.00, steps = 173\n",
      "23:01:43 [DEBUG] train episode 1008: reward = -319.00, steps = 320\n",
      "23:01:43 [DEBUG] train episode 1009: reward = -143.00, steps = 144\n",
      "23:01:43 [DEBUG] train episode 1010: reward = -167.00, steps = 168\n",
      "23:01:43 [DEBUG] train episode 1011: reward = -151.00, steps = 152\n",
      "23:01:44 [DEBUG] train episode 1012: reward = -220.00, steps = 221\n",
      "23:01:44 [DEBUG] train episode 1013: reward = -402.00, steps = 403\n",
      "23:01:44 [DEBUG] train episode 1014: reward = -278.00, steps = 279\n",
      "23:01:44 [DEBUG] train episode 1015: reward = -145.00, steps = 146\n",
      "23:01:45 [DEBUG] train episode 1016: reward = -166.00, steps = 167\n",
      "23:01:45 [DEBUG] train episode 1017: reward = -232.00, steps = 233\n",
      "23:01:45 [DEBUG] train episode 1018: reward = -158.00, steps = 159\n",
      "23:01:45 [DEBUG] train episode 1019: reward = -157.00, steps = 158\n",
      "23:01:45 [DEBUG] train episode 1020: reward = -169.00, steps = 170\n",
      "23:01:46 [DEBUG] train episode 1021: reward = -158.00, steps = 159\n",
      "23:01:46 [DEBUG] train episode 1022: reward = -196.00, steps = 197\n",
      "23:01:46 [DEBUG] train episode 1023: reward = -197.00, steps = 198\n",
      "23:01:46 [DEBUG] train episode 1024: reward = -172.00, steps = 173\n",
      "23:01:46 [DEBUG] train episode 1025: reward = -192.00, steps = 193\n",
      "23:01:47 [DEBUG] train episode 1026: reward = -188.00, steps = 189\n",
      "23:01:47 [DEBUG] train episode 1027: reward = -130.00, steps = 131\n",
      "23:01:47 [DEBUG] train episode 1028: reward = -212.00, steps = 213\n",
      "23:01:47 [DEBUG] train episode 1029: reward = -188.00, steps = 189\n",
      "23:01:47 [DEBUG] train episode 1030: reward = -134.00, steps = 135\n",
      "23:01:47 [DEBUG] train episode 1031: reward = -214.00, steps = 215\n",
      "23:01:48 [DEBUG] train episode 1032: reward = -164.00, steps = 165\n",
      "23:01:48 [DEBUG] train episode 1033: reward = -144.00, steps = 145\n",
      "23:01:48 [DEBUG] train episode 1034: reward = -250.00, steps = 251\n",
      "23:01:48 [DEBUG] train episode 1035: reward = -229.00, steps = 230\n",
      "23:01:48 [DEBUG] train episode 1036: reward = -163.00, steps = 164\n",
      "23:01:49 [DEBUG] train episode 1037: reward = -144.00, steps = 145\n",
      "23:01:49 [DEBUG] train episode 1038: reward = -150.00, steps = 151\n",
      "23:01:49 [DEBUG] train episode 1039: reward = -195.00, steps = 196\n",
      "23:01:49 [DEBUG] train episode 1040: reward = -182.00, steps = 183\n",
      "23:01:49 [DEBUG] train episode 1041: reward = -135.00, steps = 136\n",
      "23:01:50 [DEBUG] train episode 1042: reward = -190.00, steps = 191\n",
      "23:01:50 [DEBUG] train episode 1043: reward = -243.00, steps = 244\n",
      "23:01:50 [DEBUG] train episode 1044: reward = -199.00, steps = 200\n",
      "23:01:50 [DEBUG] train episode 1045: reward = -187.00, steps = 188\n",
      "23:01:50 [DEBUG] train episode 1046: reward = -203.00, steps = 204\n",
      "23:01:51 [DEBUG] train episode 1047: reward = -164.00, steps = 165\n",
      "23:01:51 [DEBUG] train episode 1048: reward = -162.00, steps = 163\n",
      "23:01:51 [DEBUG] train episode 1049: reward = -176.00, steps = 177\n",
      "23:01:51 [DEBUG] train episode 1050: reward = -167.00, steps = 168\n",
      "23:01:51 [DEBUG] train episode 1051: reward = -176.00, steps = 177\n",
      "23:01:51 [DEBUG] train episode 1052: reward = -197.00, steps = 198\n",
      "23:01:52 [DEBUG] train episode 1053: reward = -181.00, steps = 182\n",
      "23:01:52 [DEBUG] train episode 1054: reward = -149.00, steps = 150\n",
      "23:01:52 [DEBUG] train episode 1055: reward = -140.00, steps = 141\n",
      "23:01:52 [DEBUG] train episode 1056: reward = -151.00, steps = 152\n",
      "23:01:52 [DEBUG] train episode 1057: reward = -179.00, steps = 180\n",
      "23:01:52 [DEBUG] train episode 1058: reward = -170.00, steps = 171\n",
      "23:01:53 [DEBUG] train episode 1059: reward = -155.00, steps = 156\n",
      "23:01:53 [DEBUG] train episode 1060: reward = -156.00, steps = 157\n",
      "23:01:53 [DEBUG] train episode 1061: reward = -171.00, steps = 172\n",
      "23:01:53 [DEBUG] train episode 1062: reward = -158.00, steps = 159\n",
      "23:01:53 [DEBUG] train episode 1063: reward = -172.00, steps = 173\n",
      "23:01:54 [DEBUG] train episode 1064: reward = -218.00, steps = 219\n",
      "23:01:54 [DEBUG] train episode 1065: reward = -199.00, steps = 200\n",
      "23:01:54 [DEBUG] train episode 1066: reward = -149.00, steps = 150\n",
      "23:01:54 [DEBUG] train episode 1067: reward = -140.00, steps = 141\n",
      "23:01:54 [DEBUG] train episode 1068: reward = -146.00, steps = 147\n",
      "23:01:54 [DEBUG] train episode 1069: reward = -177.00, steps = 178\n",
      "23:01:55 [DEBUG] train episode 1070: reward = -198.00, steps = 199\n",
      "23:01:55 [DEBUG] train episode 1071: reward = -167.00, steps = 168\n",
      "23:01:55 [DEBUG] train episode 1072: reward = -149.00, steps = 150\n",
      "23:01:55 [DEBUG] train episode 1073: reward = -170.00, steps = 171\n",
      "23:01:55 [DEBUG] train episode 1074: reward = -139.00, steps = 140\n",
      "23:01:56 [DEBUG] train episode 1075: reward = -182.00, steps = 183\n",
      "23:01:56 [DEBUG] train episode 1076: reward = -193.00, steps = 194\n",
      "23:01:56 [DEBUG] train episode 1077: reward = -215.00, steps = 216\n",
      "23:01:56 [DEBUG] train episode 1078: reward = -203.00, steps = 204\n",
      "23:01:56 [DEBUG] train episode 1079: reward = -182.00, steps = 183\n",
      "23:01:57 [DEBUG] train episode 1080: reward = -209.00, steps = 210\n",
      "23:01:57 [DEBUG] train episode 1081: reward = -183.00, steps = 184\n",
      "23:01:57 [DEBUG] train episode 1082: reward = -177.00, steps = 178\n",
      "23:01:57 [DEBUG] train episode 1083: reward = -163.00, steps = 164\n",
      "23:01:57 [DEBUG] train episode 1084: reward = -183.00, steps = 184\n",
      "23:01:57 [DEBUG] train episode 1085: reward = -183.00, steps = 184\n",
      "23:01:58 [DEBUG] train episode 1086: reward = -207.00, steps = 208\n",
      "23:01:58 [DEBUG] train episode 1087: reward = -181.00, steps = 182\n",
      "23:01:58 [DEBUG] train episode 1088: reward = -186.00, steps = 187\n",
      "23:01:58 [DEBUG] train episode 1089: reward = -219.00, steps = 220\n",
      "23:01:58 [DEBUG] train episode 1090: reward = -148.00, steps = 149\n",
      "23:01:59 [DEBUG] train episode 1091: reward = -156.00, steps = 157\n",
      "23:01:59 [DEBUG] train episode 1092: reward = -174.00, steps = 175\n",
      "23:01:59 [DEBUG] train episode 1093: reward = -191.00, steps = 192\n",
      "23:01:59 [DEBUG] train episode 1094: reward = -208.00, steps = 209\n",
      "23:01:59 [DEBUG] train episode 1095: reward = -317.00, steps = 318\n",
      "23:02:00 [DEBUG] train episode 1096: reward = -224.00, steps = 225\n",
      "23:02:00 [DEBUG] train episode 1097: reward = -151.00, steps = 152\n",
      "23:02:00 [DEBUG] train episode 1098: reward = -208.00, steps = 209\n",
      "23:02:00 [DEBUG] train episode 1099: reward = -163.00, steps = 164\n",
      "23:02:00 [DEBUG] train episode 1100: reward = -161.00, steps = 162\n",
      "23:02:01 [DEBUG] train episode 1101: reward = -164.00, steps = 165\n",
      "23:02:01 [DEBUG] train episode 1102: reward = -188.00, steps = 189\n",
      "23:02:01 [DEBUG] train episode 1103: reward = -235.00, steps = 236\n",
      "23:02:01 [DEBUG] train episode 1104: reward = -211.00, steps = 212\n",
      "23:02:01 [DEBUG] train episode 1105: reward = -267.00, steps = 268\n",
      "23:02:02 [DEBUG] train episode 1106: reward = -166.00, steps = 167\n",
      "23:02:02 [DEBUG] train episode 1107: reward = -163.00, steps = 164\n",
      "23:02:02 [DEBUG] train episode 1108: reward = -150.00, steps = 151\n",
      "23:02:02 [DEBUG] train episode 1109: reward = -228.00, steps = 229\n",
      "23:02:02 [DEBUG] train episode 1110: reward = -222.00, steps = 223\n",
      "23:02:03 [DEBUG] train episode 1111: reward = -186.00, steps = 187\n",
      "23:02:03 [DEBUG] train episode 1112: reward = -237.00, steps = 238\n",
      "23:02:03 [DEBUG] train episode 1113: reward = -207.00, steps = 208\n",
      "23:02:03 [DEBUG] train episode 1114: reward = -183.00, steps = 184\n",
      "23:02:03 [DEBUG] train episode 1115: reward = -152.00, steps = 153\n",
      "23:02:04 [DEBUG] train episode 1116: reward = -128.00, steps = 129\n",
      "23:02:04 [DEBUG] train episode 1117: reward = -277.00, steps = 278\n",
      "23:02:04 [DEBUG] train episode 1118: reward = -211.00, steps = 212\n",
      "23:02:04 [DEBUG] train episode 1119: reward = -144.00, steps = 145\n",
      "23:02:04 [DEBUG] train episode 1120: reward = -156.00, steps = 157\n",
      "23:02:04 [DEBUG] train episode 1121: reward = -186.00, steps = 187\n",
      "23:02:05 [DEBUG] train episode 1122: reward = -162.00, steps = 163\n",
      "23:02:05 [DEBUG] train episode 1123: reward = -141.00, steps = 142\n",
      "23:02:05 [DEBUG] train episode 1124: reward = -185.00, steps = 186\n",
      "23:02:05 [DEBUG] train episode 1125: reward = -177.00, steps = 178\n",
      "23:02:05 [DEBUG] train episode 1126: reward = -186.00, steps = 187\n",
      "23:02:06 [DEBUG] train episode 1127: reward = -168.00, steps = 169\n",
      "23:02:06 [DEBUG] train episode 1128: reward = -164.00, steps = 165\n",
      "23:02:06 [DEBUG] train episode 1129: reward = -172.00, steps = 173\n",
      "23:02:06 [DEBUG] train episode 1130: reward = -155.00, steps = 156\n",
      "23:02:06 [DEBUG] train episode 1131: reward = -176.00, steps = 177\n",
      "23:02:06 [DEBUG] train episode 1132: reward = -152.00, steps = 153\n",
      "23:02:07 [DEBUG] train episode 1133: reward = -175.00, steps = 176\n",
      "23:02:07 [DEBUG] train episode 1134: reward = -173.00, steps = 174\n",
      "23:02:07 [DEBUG] train episode 1135: reward = -217.00, steps = 218\n",
      "23:02:07 [DEBUG] train episode 1136: reward = -146.00, steps = 147\n",
      "23:02:07 [DEBUG] train episode 1137: reward = -170.00, steps = 171\n",
      "23:02:08 [DEBUG] train episode 1138: reward = -179.00, steps = 180\n",
      "23:02:08 [DEBUG] train episode 1139: reward = -140.00, steps = 141\n",
      "23:02:08 [DEBUG] train episode 1140: reward = -151.00, steps = 152\n",
      "23:02:08 [DEBUG] train episode 1141: reward = -179.00, steps = 180\n",
      "23:02:08 [DEBUG] train episode 1142: reward = -142.00, steps = 143\n",
      "23:02:08 [DEBUG] train episode 1143: reward = -192.00, steps = 193\n",
      "23:02:09 [DEBUG] train episode 1144: reward = -247.00, steps = 248\n",
      "23:02:09 [DEBUG] train episode 1145: reward = -147.00, steps = 148\n",
      "23:02:09 [DEBUG] train episode 1146: reward = -125.00, steps = 126\n",
      "23:02:09 [DEBUG] train episode 1147: reward = -141.00, steps = 142\n",
      "23:02:09 [DEBUG] train episode 1148: reward = -129.00, steps = 130\n",
      "23:02:09 [DEBUG] train episode 1149: reward = -192.00, steps = 193\n",
      "23:02:10 [DEBUG] train episode 1150: reward = -251.00, steps = 252\n",
      "23:02:10 [DEBUG] train episode 1151: reward = -183.00, steps = 184\n",
      "23:02:10 [DEBUG] train episode 1152: reward = -257.00, steps = 258\n",
      "23:02:10 [DEBUG] train episode 1153: reward = -202.00, steps = 203\n",
      "23:02:10 [DEBUG] train episode 1154: reward = -142.00, steps = 143\n",
      "23:02:11 [DEBUG] train episode 1155: reward = -127.00, steps = 128\n",
      "23:02:11 [DEBUG] train episode 1156: reward = -150.00, steps = 151\n",
      "23:02:11 [DEBUG] train episode 1157: reward = -170.00, steps = 171\n",
      "23:02:11 [DEBUG] train episode 1158: reward = -179.00, steps = 180\n",
      "23:02:11 [DEBUG] train episode 1159: reward = -146.00, steps = 147\n",
      "23:02:11 [DEBUG] train episode 1160: reward = -117.00, steps = 118\n",
      "23:02:12 [DEBUG] train episode 1161: reward = -189.00, steps = 190\n",
      "23:02:12 [DEBUG] train episode 1162: reward = -358.00, steps = 359\n",
      "23:02:12 [DEBUG] train episode 1163: reward = -131.00, steps = 132\n",
      "23:02:12 [DEBUG] train episode 1164: reward = -171.00, steps = 172\n",
      "23:02:12 [DEBUG] train episode 1165: reward = -134.00, steps = 135\n",
      "23:02:13 [DEBUG] train episode 1166: reward = -189.00, steps = 190\n",
      "23:02:13 [DEBUG] train episode 1167: reward = -148.00, steps = 149\n",
      "23:02:13 [DEBUG] train episode 1168: reward = -178.00, steps = 179\n",
      "23:02:13 [DEBUG] train episode 1169: reward = -240.00, steps = 241\n",
      "23:02:13 [DEBUG] train episode 1170: reward = -118.00, steps = 119\n",
      "23:02:14 [DEBUG] train episode 1171: reward = -326.00, steps = 327\n",
      "23:02:14 [DEBUG] train episode 1172: reward = -153.00, steps = 154\n",
      "23:02:14 [DEBUG] train episode 1173: reward = -173.00, steps = 174\n",
      "23:02:14 [DEBUG] train episode 1174: reward = -137.00, steps = 138\n",
      "23:02:15 [DEBUG] train episode 1175: reward = -393.00, steps = 394\n",
      "23:02:15 [DEBUG] train episode 1176: reward = -168.00, steps = 169\n",
      "23:02:15 [DEBUG] train episode 1177: reward = -258.00, steps = 259\n",
      "23:02:15 [DEBUG] train episode 1178: reward = -195.00, steps = 196\n",
      "23:02:16 [DEBUG] train episode 1179: reward = -152.00, steps = 153\n",
      "23:02:16 [DEBUG] train episode 1180: reward = -232.00, steps = 233\n",
      "23:02:16 [DEBUG] train episode 1181: reward = -156.00, steps = 157\n",
      "23:02:16 [DEBUG] train episode 1182: reward = -161.00, steps = 162\n",
      "23:02:16 [DEBUG] train episode 1183: reward = -137.00, steps = 138\n",
      "23:02:16 [DEBUG] train episode 1184: reward = -168.00, steps = 169\n",
      "23:02:17 [DEBUG] train episode 1185: reward = -147.00, steps = 148\n",
      "23:02:17 [DEBUG] train episode 1186: reward = -232.00, steps = 233\n",
      "23:02:17 [DEBUG] train episode 1187: reward = -173.00, steps = 174\n",
      "23:02:17 [DEBUG] train episode 1188: reward = -150.00, steps = 151\n",
      "23:02:17 [DEBUG] train episode 1189: reward = -194.00, steps = 195\n",
      "23:02:17 [DEBUG] train episode 1190: reward = -147.00, steps = 148\n",
      "23:02:18 [DEBUG] train episode 1191: reward = -200.00, steps = 201\n",
      "23:02:18 [DEBUG] train episode 1192: reward = -202.00, steps = 203\n",
      "23:02:18 [DEBUG] train episode 1193: reward = -304.00, steps = 305\n",
      "23:02:18 [DEBUG] train episode 1194: reward = -116.00, steps = 117\n",
      "23:02:19 [DEBUG] train episode 1195: reward = -198.00, steps = 199\n",
      "23:02:19 [DEBUG] train episode 1196: reward = -152.00, steps = 153\n",
      "23:02:19 [DEBUG] train episode 1197: reward = -211.00, steps = 212\n",
      "23:02:19 [DEBUG] train episode 1198: reward = -118.00, steps = 119\n",
      "23:02:19 [DEBUG] train episode 1199: reward = -174.00, steps = 175\n",
      "23:02:19 [DEBUG] train episode 1200: reward = -157.00, steps = 158\n",
      "23:02:20 [DEBUG] train episode 1201: reward = -123.00, steps = 124\n",
      "23:02:20 [DEBUG] train episode 1202: reward = -145.00, steps = 146\n",
      "23:02:20 [DEBUG] train episode 1203: reward = -225.00, steps = 226\n",
      "23:02:20 [DEBUG] train episode 1204: reward = -152.00, steps = 153\n",
      "23:02:20 [DEBUG] train episode 1205: reward = -167.00, steps = 168\n",
      "23:02:20 [DEBUG] train episode 1206: reward = -149.00, steps = 150\n",
      "23:02:21 [DEBUG] train episode 1207: reward = -151.00, steps = 152\n",
      "23:02:21 [DEBUG] train episode 1208: reward = -150.00, steps = 151\n",
      "23:02:21 [DEBUG] train episode 1209: reward = -196.00, steps = 197\n",
      "23:02:21 [DEBUG] train episode 1210: reward = -248.00, steps = 249\n",
      "23:02:21 [DEBUG] train episode 1211: reward = -147.00, steps = 148\n",
      "23:02:21 [DEBUG] train episode 1212: reward = -277.00, steps = 278\n",
      "23:02:22 [DEBUG] train episode 1213: reward = -204.00, steps = 205\n",
      "23:02:22 [DEBUG] train episode 1214: reward = -143.00, steps = 144\n",
      "23:02:22 [DEBUG] train episode 1215: reward = -159.00, steps = 160\n",
      "23:02:22 [DEBUG] train episode 1216: reward = -216.00, steps = 217\n",
      "23:02:22 [DEBUG] train episode 1217: reward = -165.00, steps = 166\n",
      "23:02:23 [DEBUG] train episode 1218: reward = -147.00, steps = 148\n",
      "23:02:23 [DEBUG] train episode 1219: reward = -149.00, steps = 150\n",
      "23:02:23 [DEBUG] train episode 1220: reward = -126.00, steps = 127\n",
      "23:02:23 [DEBUG] train episode 1221: reward = -164.00, steps = 165\n",
      "23:02:23 [DEBUG] train episode 1222: reward = -127.00, steps = 128\n",
      "23:02:23 [DEBUG] train episode 1223: reward = -146.00, steps = 147\n",
      "23:02:23 [DEBUG] train episode 1224: reward = -169.00, steps = 170\n",
      "23:02:24 [DEBUG] train episode 1225: reward = -137.00, steps = 138\n",
      "23:02:24 [DEBUG] train episode 1226: reward = -145.00, steps = 146\n",
      "23:02:24 [DEBUG] train episode 1227: reward = -213.00, steps = 214\n",
      "23:02:24 [DEBUG] train episode 1228: reward = -215.00, steps = 216\n",
      "23:02:24 [DEBUG] train episode 1229: reward = -158.00, steps = 159\n",
      "23:02:25 [DEBUG] train episode 1230: reward = -158.00, steps = 159\n",
      "23:02:25 [DEBUG] train episode 1231: reward = -176.00, steps = 177\n",
      "23:02:25 [DEBUG] train episode 1232: reward = -146.00, steps = 147\n",
      "23:02:25 [DEBUG] train episode 1233: reward = -153.00, steps = 154\n",
      "23:02:25 [DEBUG] train episode 1234: reward = -266.00, steps = 267\n",
      "23:02:25 [DEBUG] train episode 1235: reward = -177.00, steps = 178\n",
      "23:02:26 [DEBUG] train episode 1236: reward = -152.00, steps = 153\n",
      "23:02:26 [DEBUG] train episode 1237: reward = -171.00, steps = 172\n",
      "23:02:26 [DEBUG] train episode 1238: reward = -159.00, steps = 160\n",
      "23:02:26 [DEBUG] train episode 1239: reward = -153.00, steps = 154\n",
      "23:02:26 [DEBUG] train episode 1240: reward = -168.00, steps = 169\n",
      "23:02:26 [DEBUG] train episode 1241: reward = -161.00, steps = 162\n",
      "23:02:27 [DEBUG] train episode 1242: reward = -161.00, steps = 162\n",
      "23:02:27 [DEBUG] train episode 1243: reward = -125.00, steps = 126\n",
      "23:02:27 [DEBUG] train episode 1244: reward = -157.00, steps = 158\n",
      "23:02:27 [DEBUG] train episode 1245: reward = -169.00, steps = 170\n",
      "23:02:27 [DEBUG] train episode 1246: reward = -126.00, steps = 127\n",
      "23:02:27 [DEBUG] train episode 1247: reward = -183.00, steps = 184\n",
      "23:02:28 [DEBUG] train episode 1248: reward = -171.00, steps = 172\n",
      "23:02:28 [DEBUG] train episode 1249: reward = -140.00, steps = 141\n",
      "23:02:28 [DEBUG] train episode 1250: reward = -166.00, steps = 167\n",
      "23:02:28 [DEBUG] train episode 1251: reward = -192.00, steps = 193\n",
      "23:02:28 [DEBUG] train episode 1252: reward = -277.00, steps = 278\n",
      "23:02:29 [DEBUG] train episode 1253: reward = -179.00, steps = 180\n",
      "23:02:29 [DEBUG] train episode 1254: reward = -123.00, steps = 124\n",
      "23:02:29 [DEBUG] train episode 1255: reward = -177.00, steps = 178\n",
      "23:02:29 [DEBUG] train episode 1256: reward = -189.00, steps = 190\n",
      "23:02:29 [DEBUG] train episode 1257: reward = -171.00, steps = 172\n",
      "23:02:29 [DEBUG] train episode 1258: reward = -164.00, steps = 165\n",
      "23:02:30 [DEBUG] train episode 1259: reward = -227.00, steps = 228\n",
      "23:02:30 [DEBUG] train episode 1260: reward = -134.00, steps = 135\n",
      "23:02:30 [DEBUG] train episode 1261: reward = -184.00, steps = 185\n",
      "23:02:30 [DEBUG] train episode 1262: reward = -150.00, steps = 151\n",
      "23:02:30 [DEBUG] train episode 1263: reward = -167.00, steps = 168\n",
      "23:02:31 [DEBUG] train episode 1264: reward = -164.00, steps = 165\n",
      "23:02:31 [DEBUG] train episode 1265: reward = -167.00, steps = 168\n",
      "23:02:31 [DEBUG] train episode 1266: reward = -195.00, steps = 196\n",
      "23:02:31 [DEBUG] train episode 1267: reward = -154.00, steps = 155\n",
      "23:02:31 [DEBUG] train episode 1268: reward = -166.00, steps = 167\n",
      "23:02:31 [DEBUG] train episode 1269: reward = -147.00, steps = 148\n",
      "23:02:32 [DEBUG] train episode 1270: reward = -214.00, steps = 215\n",
      "23:02:32 [DEBUG] train episode 1271: reward = -126.00, steps = 127\n",
      "23:02:32 [DEBUG] train episode 1272: reward = -153.00, steps = 154\n",
      "23:02:32 [DEBUG] train episode 1273: reward = -149.00, steps = 150\n",
      "23:02:32 [DEBUG] train episode 1274: reward = -142.00, steps = 143\n",
      "23:02:32 [DEBUG] train episode 1275: reward = -208.00, steps = 209\n",
      "23:02:33 [DEBUG] train episode 1276: reward = -231.00, steps = 232\n",
      "23:02:33 [DEBUG] train episode 1277: reward = -151.00, steps = 152\n",
      "23:02:33 [DEBUG] train episode 1278: reward = -155.00, steps = 156\n",
      "23:02:33 [DEBUG] train episode 1279: reward = -198.00, steps = 199\n",
      "23:02:33 [DEBUG] train episode 1280: reward = -125.00, steps = 126\n",
      "23:02:33 [DEBUG] train episode 1281: reward = -155.00, steps = 156\n",
      "23:02:34 [DEBUG] train episode 1282: reward = -152.00, steps = 153\n",
      "23:02:34 [DEBUG] train episode 1283: reward = -154.00, steps = 155\n",
      "23:02:34 [DEBUG] train episode 1284: reward = -137.00, steps = 138\n",
      "23:02:34 [DEBUG] train episode 1285: reward = -243.00, steps = 244\n",
      "23:02:34 [DEBUG] train episode 1286: reward = -112.00, steps = 113\n",
      "23:02:34 [DEBUG] train episode 1287: reward = -162.00, steps = 163\n",
      "23:02:35 [DEBUG] train episode 1288: reward = -149.00, steps = 150\n",
      "23:02:35 [DEBUG] train episode 1289: reward = -124.00, steps = 125\n",
      "23:02:35 [DEBUG] train episode 1290: reward = -172.00, steps = 173\n",
      "23:02:35 [DEBUG] train episode 1291: reward = -183.00, steps = 184\n",
      "23:02:35 [DEBUG] train episode 1292: reward = -185.00, steps = 186\n",
      "23:02:35 [DEBUG] train episode 1293: reward = -147.00, steps = 148\n",
      "23:02:35 [DEBUG] train episode 1294: reward = -162.00, steps = 163\n",
      "23:02:36 [DEBUG] train episode 1295: reward = -183.00, steps = 184\n",
      "23:02:36 [DEBUG] train episode 1296: reward = -177.00, steps = 178\n",
      "23:02:36 [DEBUG] train episode 1297: reward = -148.00, steps = 149\n",
      "23:02:36 [DEBUG] train episode 1298: reward = -127.00, steps = 128\n",
      "23:02:36 [DEBUG] train episode 1299: reward = -191.00, steps = 192\n",
      "23:02:37 [DEBUG] train episode 1300: reward = -224.00, steps = 225\n",
      "23:02:37 [DEBUG] train episode 1301: reward = -172.00, steps = 173\n",
      "23:02:37 [DEBUG] train episode 1302: reward = -154.00, steps = 155\n",
      "23:02:37 [DEBUG] train episode 1303: reward = -158.00, steps = 159\n",
      "23:02:37 [DEBUG] train episode 1304: reward = -157.00, steps = 158\n",
      "23:02:37 [DEBUG] train episode 1305: reward = -153.00, steps = 154\n",
      "23:02:38 [DEBUG] train episode 1306: reward = -143.00, steps = 144\n",
      "23:02:38 [DEBUG] train episode 1307: reward = -179.00, steps = 180\n",
      "23:02:38 [DEBUG] train episode 1308: reward = -143.00, steps = 144\n",
      "23:02:38 [DEBUG] train episode 1309: reward = -143.00, steps = 144\n",
      "23:02:38 [DEBUG] train episode 1310: reward = -149.00, steps = 150\n",
      "23:02:38 [DEBUG] train episode 1311: reward = -142.00, steps = 143\n",
      "23:02:38 [DEBUG] train episode 1312: reward = -165.00, steps = 166\n",
      "23:02:39 [DEBUG] train episode 1313: reward = -158.00, steps = 159\n",
      "23:02:39 [DEBUG] train episode 1314: reward = -194.00, steps = 195\n",
      "23:02:39 [DEBUG] train episode 1315: reward = -137.00, steps = 138\n",
      "23:02:39 [DEBUG] train episode 1316: reward = -146.00, steps = 147\n",
      "23:02:39 [DEBUG] train episode 1317: reward = -161.00, steps = 162\n",
      "23:02:39 [DEBUG] train episode 1318: reward = -149.00, steps = 150\n",
      "23:02:40 [DEBUG] train episode 1319: reward = -168.00, steps = 169\n",
      "23:02:40 [DEBUG] train episode 1320: reward = -268.00, steps = 269\n",
      "23:02:40 [DEBUG] train episode 1321: reward = -153.00, steps = 154\n",
      "23:02:40 [DEBUG] train episode 1322: reward = -147.00, steps = 148\n",
      "23:02:40 [DEBUG] train episode 1323: reward = -160.00, steps = 161\n",
      "23:02:40 [DEBUG] train episode 1324: reward = -143.00, steps = 144\n",
      "23:02:41 [DEBUG] train episode 1325: reward = -170.00, steps = 171\n",
      "23:02:41 [DEBUG] train episode 1326: reward = -161.00, steps = 162\n",
      "23:02:41 [DEBUG] train episode 1327: reward = -182.00, steps = 183\n",
      "23:02:41 [DEBUG] train episode 1328: reward = -212.00, steps = 213\n",
      "23:02:41 [DEBUG] train episode 1329: reward = -164.00, steps = 165\n",
      "23:02:41 [DEBUG] train episode 1330: reward = -142.00, steps = 143\n",
      "23:02:42 [DEBUG] train episode 1331: reward = -187.00, steps = 188\n",
      "23:02:42 [DEBUG] train episode 1332: reward = -139.00, steps = 140\n",
      "23:02:42 [DEBUG] train episode 1333: reward = -136.00, steps = 137\n",
      "23:02:42 [DEBUG] train episode 1334: reward = -116.00, steps = 117\n",
      "23:02:42 [DEBUG] train episode 1335: reward = -153.00, steps = 154\n",
      "23:02:42 [DEBUG] train episode 1336: reward = -149.00, steps = 150\n",
      "23:02:43 [DEBUG] train episode 1337: reward = -189.00, steps = 190\n",
      "23:02:43 [DEBUG] train episode 1338: reward = -185.00, steps = 186\n",
      "23:02:43 [DEBUG] train episode 1339: reward = -141.00, steps = 142\n",
      "23:02:43 [DEBUG] train episode 1340: reward = -124.00, steps = 125\n",
      "23:02:43 [DEBUG] train episode 1341: reward = -268.00, steps = 269\n",
      "23:02:44 [DEBUG] train episode 1342: reward = -136.00, steps = 137\n",
      "23:02:44 [DEBUG] train episode 1343: reward = -196.00, steps = 197\n",
      "23:02:44 [DEBUG] train episode 1344: reward = -217.00, steps = 218\n",
      "23:02:44 [DEBUG] train episode 1345: reward = -163.00, steps = 164\n",
      "23:02:44 [DEBUG] train episode 1346: reward = -136.00, steps = 137\n",
      "23:02:45 [DEBUG] train episode 1347: reward = -146.00, steps = 147\n",
      "23:02:45 [DEBUG] train episode 1348: reward = -136.00, steps = 137\n",
      "23:02:45 [DEBUG] train episode 1349: reward = -158.00, steps = 159\n",
      "23:02:45 [DEBUG] train episode 1350: reward = -216.00, steps = 217\n",
      "23:02:45 [DEBUG] train episode 1351: reward = -187.00, steps = 188\n",
      "23:02:45 [DEBUG] train episode 1352: reward = -126.00, steps = 127\n",
      "23:02:45 [DEBUG] train episode 1353: reward = -142.00, steps = 143\n",
      "23:02:46 [DEBUG] train episode 1354: reward = -117.00, steps = 118\n",
      "23:02:46 [DEBUG] train episode 1355: reward = -113.00, steps = 114\n",
      "23:02:46 [DEBUG] train episode 1356: reward = -162.00, steps = 163\n",
      "23:02:46 [DEBUG] train episode 1357: reward = -128.00, steps = 129\n",
      "23:02:46 [DEBUG] train episode 1358: reward = -174.00, steps = 175\n",
      "23:02:46 [DEBUG] train episode 1359: reward = -138.00, steps = 139\n",
      "23:02:47 [DEBUG] train episode 1360: reward = -160.00, steps = 161\n",
      "23:02:47 [DEBUG] train episode 1361: reward = -134.00, steps = 135\n",
      "23:02:47 [DEBUG] train episode 1362: reward = -207.00, steps = 208\n",
      "23:02:47 [DEBUG] train episode 1363: reward = -143.00, steps = 144\n",
      "23:02:47 [DEBUG] train episode 1364: reward = -166.00, steps = 167\n",
      "23:02:47 [DEBUG] train episode 1365: reward = -221.00, steps = 222\n",
      "23:02:48 [DEBUG] train episode 1366: reward = -180.00, steps = 181\n",
      "23:02:48 [DEBUG] train episode 1367: reward = -151.00, steps = 152\n",
      "23:02:48 [DEBUG] train episode 1368: reward = -168.00, steps = 169\n",
      "23:02:48 [DEBUG] train episode 1369: reward = -142.00, steps = 143\n",
      "23:02:48 [DEBUG] train episode 1370: reward = -142.00, steps = 143\n",
      "23:02:48 [DEBUG] train episode 1371: reward = -168.00, steps = 169\n",
      "23:02:49 [DEBUG] train episode 1372: reward = -180.00, steps = 181\n",
      "23:02:49 [DEBUG] train episode 1373: reward = -133.00, steps = 134\n",
      "23:02:49 [DEBUG] train episode 1374: reward = -207.00, steps = 208\n",
      "23:02:49 [DEBUG] train episode 1375: reward = -178.00, steps = 179\n",
      "23:02:49 [DEBUG] train episode 1376: reward = -151.00, steps = 152\n",
      "23:02:49 [DEBUG] train episode 1377: reward = -115.00, steps = 116\n",
      "23:02:50 [DEBUG] train episode 1378: reward = -159.00, steps = 160\n",
      "23:02:50 [DEBUG] train episode 1379: reward = -150.00, steps = 151\n",
      "23:02:50 [DEBUG] train episode 1380: reward = -110.00, steps = 111\n",
      "23:02:50 [DEBUG] train episode 1381: reward = -117.00, steps = 118\n",
      "23:02:50 [DEBUG] train episode 1382: reward = -135.00, steps = 136\n",
      "23:02:50 [DEBUG] train episode 1383: reward = -180.00, steps = 181\n",
      "23:02:50 [DEBUG] train episode 1384: reward = -136.00, steps = 137\n",
      "23:02:51 [DEBUG] train episode 1385: reward = -147.00, steps = 148\n",
      "23:02:51 [DEBUG] train episode 1386: reward = -108.00, steps = 109\n",
      "23:02:51 [DEBUG] train episode 1387: reward = -123.00, steps = 124\n",
      "23:02:51 [DEBUG] train episode 1388: reward = -153.00, steps = 154\n",
      "23:02:51 [DEBUG] train episode 1389: reward = -153.00, steps = 154\n",
      "23:02:51 [DEBUG] train episode 1390: reward = -132.00, steps = 133\n",
      "23:02:52 [DEBUG] train episode 1391: reward = -173.00, steps = 174\n",
      "23:02:52 [DEBUG] train episode 1392: reward = -141.00, steps = 142\n",
      "23:02:52 [DEBUG] train episode 1393: reward = -175.00, steps = 176\n",
      "23:02:52 [DEBUG] train episode 1394: reward = -228.00, steps = 229\n",
      "23:02:52 [DEBUG] train episode 1395: reward = -158.00, steps = 159\n",
      "23:02:52 [DEBUG] train episode 1396: reward = -150.00, steps = 151\n",
      "23:02:53 [DEBUG] train episode 1397: reward = -225.00, steps = 226\n",
      "23:02:53 [DEBUG] train episode 1398: reward = -180.00, steps = 181\n",
      "23:02:53 [DEBUG] train episode 1399: reward = -172.00, steps = 173\n",
      "23:02:53 [DEBUG] train episode 1400: reward = -176.00, steps = 177\n",
      "23:02:53 [DEBUG] train episode 1401: reward = -152.00, steps = 153\n",
      "23:02:54 [DEBUG] train episode 1402: reward = -127.00, steps = 128\n",
      "23:02:54 [DEBUG] train episode 1403: reward = -224.00, steps = 225\n",
      "23:02:54 [DEBUG] train episode 1404: reward = -131.00, steps = 132\n",
      "23:02:54 [DEBUG] train episode 1405: reward = -160.00, steps = 161\n",
      "23:02:54 [DEBUG] train episode 1406: reward = -178.00, steps = 179\n",
      "23:02:54 [DEBUG] train episode 1407: reward = -152.00, steps = 153\n",
      "23:02:55 [DEBUG] train episode 1408: reward = -121.00, steps = 122\n",
      "23:02:55 [DEBUG] train episode 1409: reward = -177.00, steps = 178\n",
      "23:02:55 [DEBUG] train episode 1410: reward = -155.00, steps = 156\n",
      "23:02:55 [DEBUG] train episode 1411: reward = -140.00, steps = 141\n",
      "23:02:55 [DEBUG] train episode 1412: reward = -184.00, steps = 185\n",
      "23:02:56 [DEBUG] train episode 1413: reward = -247.00, steps = 248\n",
      "23:02:56 [DEBUG] train episode 1414: reward = -170.00, steps = 171\n",
      "23:02:56 [DEBUG] train episode 1415: reward = -158.00, steps = 159\n",
      "23:02:56 [DEBUG] train episode 1416: reward = -145.00, steps = 146\n",
      "23:02:56 [DEBUG] train episode 1417: reward = -275.00, steps = 276\n",
      "23:02:56 [DEBUG] train episode 1418: reward = -216.00, steps = 217\n",
      "23:02:57 [DEBUG] train episode 1419: reward = -119.00, steps = 120\n",
      "23:02:57 [DEBUG] train episode 1420: reward = -166.00, steps = 167\n",
      "23:02:57 [DEBUG] train episode 1421: reward = -133.00, steps = 134\n",
      "23:02:57 [DEBUG] train episode 1422: reward = -164.00, steps = 165\n",
      "23:02:57 [DEBUG] train episode 1423: reward = -158.00, steps = 159\n",
      "23:02:57 [DEBUG] train episode 1424: reward = -162.00, steps = 163\n",
      "23:02:58 [DEBUG] train episode 1425: reward = -128.00, steps = 129\n",
      "23:02:58 [DEBUG] train episode 1426: reward = -161.00, steps = 162\n",
      "23:02:58 [DEBUG] train episode 1427: reward = -167.00, steps = 168\n",
      "23:02:58 [DEBUG] train episode 1428: reward = -166.00, steps = 167\n",
      "23:02:58 [DEBUG] train episode 1429: reward = -139.00, steps = 140\n",
      "23:02:59 [DEBUG] train episode 1430: reward = -130.00, steps = 131\n",
      "23:02:59 [DEBUG] train episode 1431: reward = -149.00, steps = 150\n",
      "23:02:59 [DEBUG] train episode 1432: reward = -144.00, steps = 145\n",
      "23:02:59 [DEBUG] train episode 1433: reward = -163.00, steps = 164\n",
      "23:02:59 [DEBUG] train episode 1434: reward = -130.00, steps = 131\n",
      "23:02:59 [DEBUG] train episode 1435: reward = -157.00, steps = 158\n",
      "23:03:00 [DEBUG] train episode 1436: reward = -166.00, steps = 167\n",
      "23:03:00 [DEBUG] train episode 1437: reward = -219.00, steps = 220\n",
      "23:03:00 [DEBUG] train episode 1438: reward = -157.00, steps = 158\n",
      "23:03:00 [DEBUG] train episode 1439: reward = -147.00, steps = 148\n",
      "23:03:00 [DEBUG] train episode 1440: reward = -145.00, steps = 146\n",
      "23:03:00 [DEBUG] train episode 1441: reward = -125.00, steps = 126\n",
      "23:03:01 [DEBUG] train episode 1442: reward = -145.00, steps = 146\n",
      "23:03:01 [DEBUG] train episode 1443: reward = -168.00, steps = 169\n",
      "23:03:01 [DEBUG] train episode 1444: reward = -134.00, steps = 135\n",
      "23:03:01 [DEBUG] train episode 1445: reward = -170.00, steps = 171\n",
      "23:03:01 [DEBUG] train episode 1446: reward = -133.00, steps = 134\n",
      "23:03:01 [DEBUG] train episode 1447: reward = -144.00, steps = 145\n",
      "23:03:02 [DEBUG] train episode 1448: reward = -164.00, steps = 165\n",
      "23:03:02 [DEBUG] train episode 1449: reward = -226.00, steps = 227\n",
      "23:03:02 [DEBUG] train episode 1450: reward = -144.00, steps = 145\n",
      "23:03:02 [DEBUG] train episode 1451: reward = -209.00, steps = 210\n",
      "23:03:02 [DEBUG] train episode 1452: reward = -139.00, steps = 140\n",
      "23:03:03 [DEBUG] train episode 1453: reward = -146.00, steps = 147\n",
      "23:03:03 [DEBUG] train episode 1454: reward = -207.00, steps = 208\n",
      "23:03:03 [DEBUG] train episode 1455: reward = -176.00, steps = 177\n",
      "23:03:03 [DEBUG] train episode 1456: reward = -129.00, steps = 130\n",
      "23:03:03 [DEBUG] train episode 1457: reward = -120.00, steps = 121\n",
      "23:03:03 [DEBUG] train episode 1458: reward = -159.00, steps = 160\n",
      "23:03:03 [DEBUG] train episode 1459: reward = -137.00, steps = 138\n",
      "23:03:04 [DEBUG] train episode 1460: reward = -166.00, steps = 167\n",
      "23:03:04 [DEBUG] train episode 1461: reward = -148.00, steps = 149\n",
      "23:03:04 [DEBUG] train episode 1462: reward = -138.00, steps = 139\n",
      "23:03:04 [DEBUG] train episode 1463: reward = -231.00, steps = 232\n",
      "23:03:04 [DEBUG] train episode 1464: reward = -154.00, steps = 155\n",
      "23:03:05 [DEBUG] train episode 1465: reward = -155.00, steps = 156\n",
      "23:03:05 [DEBUG] train episode 1466: reward = -158.00, steps = 159\n",
      "23:03:05 [DEBUG] train episode 1467: reward = -153.00, steps = 154\n",
      "23:03:05 [DEBUG] train episode 1468: reward = -128.00, steps = 129\n",
      "23:03:05 [DEBUG] train episode 1469: reward = -150.00, steps = 151\n",
      "23:03:05 [DEBUG] train episode 1470: reward = -140.00, steps = 141\n",
      "23:03:05 [DEBUG] train episode 1471: reward = -152.00, steps = 153\n",
      "23:03:06 [DEBUG] train episode 1472: reward = -158.00, steps = 159\n",
      "23:03:06 [DEBUG] train episode 1473: reward = -233.00, steps = 234\n",
      "23:03:06 [DEBUG] train episode 1474: reward = -246.00, steps = 247\n",
      "23:03:06 [DEBUG] train episode 1475: reward = -149.00, steps = 150\n",
      "23:03:06 [DEBUG] train episode 1476: reward = -136.00, steps = 137\n",
      "23:03:06 [DEBUG] train episode 1477: reward = -142.00, steps = 143\n",
      "23:03:07 [DEBUG] train episode 1478: reward = -176.00, steps = 177\n",
      "23:03:07 [DEBUG] train episode 1479: reward = -134.00, steps = 135\n",
      "23:03:07 [DEBUG] train episode 1480: reward = -223.00, steps = 224\n",
      "23:03:07 [DEBUG] train episode 1481: reward = -189.00, steps = 190\n",
      "23:03:07 [DEBUG] train episode 1482: reward = -157.00, steps = 158\n",
      "23:03:08 [DEBUG] train episode 1483: reward = -140.00, steps = 141\n",
      "23:03:08 [DEBUG] train episode 1484: reward = -237.00, steps = 238\n",
      "23:03:08 [DEBUG] train episode 1485: reward = -149.00, steps = 150\n",
      "23:03:08 [DEBUG] train episode 1486: reward = -162.00, steps = 163\n",
      "23:03:08 [DEBUG] train episode 1487: reward = -179.00, steps = 180\n",
      "23:03:08 [DEBUG] train episode 1488: reward = -142.00, steps = 143\n",
      "23:03:09 [DEBUG] train episode 1489: reward = -154.00, steps = 155\n",
      "23:03:09 [DEBUG] train episode 1490: reward = -135.00, steps = 136\n",
      "23:03:09 [DEBUG] train episode 1491: reward = -165.00, steps = 166\n",
      "23:03:09 [DEBUG] train episode 1492: reward = -135.00, steps = 136\n",
      "23:03:09 [DEBUG] train episode 1493: reward = -152.00, steps = 153\n",
      "23:03:09 [DEBUG] train episode 1494: reward = -149.00, steps = 150\n",
      "23:03:10 [DEBUG] train episode 1495: reward = -162.00, steps = 163\n",
      "23:03:10 [DEBUG] train episode 1496: reward = -152.00, steps = 153\n",
      "23:03:10 [DEBUG] train episode 1497: reward = -160.00, steps = 161\n",
      "23:03:10 [DEBUG] train episode 1498: reward = -143.00, steps = 144\n",
      "23:03:10 [DEBUG] train episode 1499: reward = -171.00, steps = 172\n",
      "23:03:10 [DEBUG] train episode 1500: reward = -141.00, steps = 142\n",
      "23:03:11 [DEBUG] train episode 1501: reward = -174.00, steps = 175\n",
      "23:03:11 [DEBUG] train episode 1502: reward = -168.00, steps = 169\n",
      "23:03:11 [DEBUG] train episode 1503: reward = -149.00, steps = 150\n",
      "23:03:11 [DEBUG] train episode 1504: reward = -141.00, steps = 142\n",
      "23:03:11 [DEBUG] train episode 1505: reward = -108.00, steps = 109\n",
      "23:03:11 [DEBUG] train episode 1506: reward = -119.00, steps = 120\n",
      "23:03:11 [DEBUG] train episode 1507: reward = -141.00, steps = 142\n",
      "23:03:12 [DEBUG] train episode 1508: reward = -158.00, steps = 159\n",
      "23:03:12 [DEBUG] train episode 1509: reward = -138.00, steps = 139\n",
      "23:03:12 [DEBUG] train episode 1510: reward = -148.00, steps = 149\n",
      "23:03:12 [DEBUG] train episode 1511: reward = -138.00, steps = 139\n",
      "23:03:12 [DEBUG] train episode 1512: reward = -142.00, steps = 143\n",
      "23:03:12 [DEBUG] train episode 1513: reward = -154.00, steps = 155\n",
      "23:03:12 [DEBUG] train episode 1514: reward = -143.00, steps = 144\n",
      "23:03:13 [DEBUG] train episode 1515: reward = -145.00, steps = 146\n",
      "23:03:13 [DEBUG] train episode 1516: reward = -111.00, steps = 112\n",
      "23:03:13 [DEBUG] train episode 1517: reward = -144.00, steps = 145\n",
      "23:03:13 [DEBUG] train episode 1518: reward = -148.00, steps = 149\n",
      "23:03:13 [DEBUG] train episode 1519: reward = -119.00, steps = 120\n",
      "23:03:13 [DEBUG] train episode 1520: reward = -133.00, steps = 134\n",
      "23:03:14 [DEBUG] train episode 1521: reward = -179.00, steps = 180\n",
      "23:03:14 [DEBUG] train episode 1522: reward = -147.00, steps = 148\n",
      "23:03:14 [DEBUG] train episode 1523: reward = -197.00, steps = 198\n",
      "23:03:14 [DEBUG] train episode 1524: reward = -146.00, steps = 147\n",
      "23:03:14 [DEBUG] train episode 1525: reward = -143.00, steps = 144\n",
      "23:03:14 [DEBUG] train episode 1526: reward = -169.00, steps = 170\n",
      "23:03:15 [DEBUG] train episode 1527: reward = -155.00, steps = 156\n",
      "23:03:15 [DEBUG] train episode 1528: reward = -157.00, steps = 158\n",
      "23:03:15 [DEBUG] train episode 1529: reward = -153.00, steps = 154\n",
      "23:03:15 [DEBUG] train episode 1530: reward = -184.00, steps = 185\n",
      "23:03:15 [DEBUG] train episode 1531: reward = -121.00, steps = 122\n",
      "23:03:15 [DEBUG] train episode 1532: reward = -174.00, steps = 175\n",
      "23:03:16 [DEBUG] train episode 1533: reward = -148.00, steps = 149\n",
      "23:03:16 [DEBUG] train episode 1534: reward = -185.00, steps = 186\n",
      "23:03:16 [DEBUG] train episode 1535: reward = -150.00, steps = 151\n",
      "23:03:16 [DEBUG] train episode 1536: reward = -142.00, steps = 143\n",
      "23:03:16 [DEBUG] train episode 1537: reward = -153.00, steps = 154\n",
      "23:03:16 [DEBUG] train episode 1538: reward = -119.00, steps = 120\n",
      "23:03:17 [DEBUG] train episode 1539: reward = -145.00, steps = 146\n",
      "23:03:17 [DEBUG] train episode 1540: reward = -163.00, steps = 164\n",
      "23:03:17 [DEBUG] train episode 1541: reward = -166.00, steps = 167\n",
      "23:03:17 [DEBUG] train episode 1542: reward = -146.00, steps = 147\n",
      "23:03:17 [DEBUG] train episode 1543: reward = -140.00, steps = 141\n",
      "23:03:17 [DEBUG] train episode 1544: reward = -170.00, steps = 171\n",
      "23:03:18 [DEBUG] train episode 1545: reward = -115.00, steps = 116\n",
      "23:03:18 [DEBUG] train episode 1546: reward = -148.00, steps = 149\n",
      "23:03:18 [DEBUG] train episode 1547: reward = -198.00, steps = 199\n",
      "23:03:18 [DEBUG] train episode 1548: reward = -162.00, steps = 163\n",
      "23:03:18 [DEBUG] train episode 1549: reward = -185.00, steps = 186\n",
      "23:03:18 [DEBUG] train episode 1550: reward = -172.00, steps = 173\n",
      "23:03:19 [DEBUG] train episode 1551: reward = -159.00, steps = 160\n",
      "23:03:19 [DEBUG] train episode 1552: reward = -197.00, steps = 198\n",
      "23:03:19 [DEBUG] train episode 1553: reward = -126.00, steps = 127\n",
      "23:03:19 [DEBUG] train episode 1554: reward = -126.00, steps = 127\n",
      "23:03:19 [DEBUG] train episode 1555: reward = -312.00, steps = 313\n",
      "23:03:19 [DEBUG] train episode 1556: reward = -148.00, steps = 149\n",
      "23:03:20 [DEBUG] train episode 1557: reward = -133.00, steps = 134\n",
      "23:03:20 [DEBUG] train episode 1558: reward = -169.00, steps = 170\n",
      "23:03:20 [DEBUG] train episode 1559: reward = -134.00, steps = 135\n",
      "23:03:20 [DEBUG] train episode 1560: reward = -171.00, steps = 172\n",
      "23:03:20 [DEBUG] train episode 1561: reward = -162.00, steps = 163\n",
      "23:03:21 [DEBUG] train episode 1562: reward = -161.00, steps = 162\n",
      "23:03:21 [DEBUG] train episode 1563: reward = -222.00, steps = 223\n",
      "23:03:21 [DEBUG] train episode 1564: reward = -153.00, steps = 154\n",
      "23:03:21 [DEBUG] train episode 1565: reward = -147.00, steps = 148\n",
      "23:03:21 [DEBUG] train episode 1566: reward = -137.00, steps = 138\n",
      "23:03:21 [DEBUG] train episode 1567: reward = -128.00, steps = 129\n",
      "23:03:22 [DEBUG] train episode 1568: reward = -127.00, steps = 128\n",
      "23:03:22 [DEBUG] train episode 1569: reward = -134.00, steps = 135\n",
      "23:03:22 [DEBUG] train episode 1570: reward = -197.00, steps = 198\n",
      "23:03:22 [DEBUG] train episode 1571: reward = -174.00, steps = 175\n",
      "23:03:22 [DEBUG] train episode 1572: reward = -169.00, steps = 170\n",
      "23:03:22 [DEBUG] train episode 1573: reward = -166.00, steps = 167\n",
      "23:03:23 [DEBUG] train episode 1574: reward = -149.00, steps = 150\n",
      "23:03:23 [DEBUG] train episode 1575: reward = -120.00, steps = 121\n",
      "23:03:23 [DEBUG] train episode 1576: reward = -180.00, steps = 181\n",
      "23:03:23 [DEBUG] train episode 1577: reward = -202.00, steps = 203\n",
      "23:03:23 [DEBUG] train episode 1578: reward = -126.00, steps = 127\n",
      "23:03:23 [DEBUG] train episode 1579: reward = -161.00, steps = 162\n",
      "23:03:24 [DEBUG] train episode 1580: reward = -131.00, steps = 132\n",
      "23:03:24 [DEBUG] train episode 1581: reward = -133.00, steps = 134\n",
      "23:03:24 [DEBUG] train episode 1582: reward = -208.00, steps = 209\n",
      "23:03:24 [DEBUG] train episode 1583: reward = -141.00, steps = 142\n",
      "23:03:24 [DEBUG] train episode 1584: reward = -141.00, steps = 142\n",
      "23:03:24 [DEBUG] train episode 1585: reward = -110.00, steps = 111\n",
      "23:03:24 [DEBUG] train episode 1586: reward = -152.00, steps = 153\n",
      "23:03:25 [DEBUG] train episode 1587: reward = -111.00, steps = 112\n",
      "23:03:25 [DEBUG] train episode 1588: reward = -174.00, steps = 175\n",
      "23:03:25 [DEBUG] train episode 1589: reward = -140.00, steps = 141\n",
      "23:03:25 [DEBUG] train episode 1590: reward = -123.00, steps = 124\n",
      "23:03:25 [DEBUG] train episode 1591: reward = -141.00, steps = 142\n",
      "23:03:25 [DEBUG] train episode 1592: reward = -112.00, steps = 113\n",
      "23:03:25 [DEBUG] train episode 1593: reward = -140.00, steps = 141\n",
      "23:03:26 [DEBUG] train episode 1594: reward = -143.00, steps = 144\n",
      "23:03:26 [DEBUG] train episode 1595: reward = -154.00, steps = 155\n",
      "23:03:26 [DEBUG] train episode 1596: reward = -168.00, steps = 169\n",
      "23:03:26 [DEBUG] train episode 1597: reward = -160.00, steps = 161\n",
      "23:03:26 [DEBUG] train episode 1598: reward = -154.00, steps = 155\n",
      "23:03:26 [DEBUG] train episode 1599: reward = -131.00, steps = 132\n",
      "23:03:27 [DEBUG] train episode 1600: reward = -167.00, steps = 168\n",
      "23:03:27 [DEBUG] train episode 1601: reward = -145.00, steps = 146\n",
      "23:03:27 [DEBUG] train episode 1602: reward = -160.00, steps = 161\n",
      "23:03:27 [DEBUG] train episode 1603: reward = -124.00, steps = 125\n",
      "23:03:27 [DEBUG] train episode 1604: reward = -122.00, steps = 123\n",
      "23:03:27 [DEBUG] train episode 1605: reward = -165.00, steps = 166\n",
      "23:03:28 [DEBUG] train episode 1606: reward = -173.00, steps = 174\n",
      "23:03:28 [DEBUG] train episode 1607: reward = -155.00, steps = 156\n",
      "23:03:28 [DEBUG] train episode 1608: reward = -171.00, steps = 172\n",
      "23:03:28 [DEBUG] train episode 1609: reward = -134.00, steps = 135\n",
      "23:03:28 [DEBUG] train episode 1610: reward = -174.00, steps = 175\n",
      "23:03:28 [DEBUG] train episode 1611: reward = -170.00, steps = 171\n",
      "23:03:29 [DEBUG] train episode 1612: reward = -180.00, steps = 181\n",
      "23:03:29 [DEBUG] train episode 1613: reward = -177.00, steps = 178\n",
      "23:03:29 [DEBUG] train episode 1614: reward = -136.00, steps = 137\n",
      "23:03:29 [DEBUG] train episode 1615: reward = -125.00, steps = 126\n",
      "23:03:29 [DEBUG] train episode 1616: reward = -112.00, steps = 113\n",
      "23:03:29 [DEBUG] train episode 1617: reward = -128.00, steps = 129\n",
      "23:03:29 [DEBUG] train episode 1618: reward = -112.00, steps = 113\n",
      "23:03:30 [DEBUG] train episode 1619: reward = -123.00, steps = 124\n",
      "23:03:30 [DEBUG] train episode 1620: reward = -133.00, steps = 134\n",
      "23:03:30 [DEBUG] train episode 1621: reward = -242.00, steps = 243\n",
      "23:03:30 [DEBUG] train episode 1622: reward = -184.00, steps = 185\n",
      "23:03:30 [DEBUG] train episode 1623: reward = -170.00, steps = 171\n",
      "23:03:30 [DEBUG] train episode 1624: reward = -161.00, steps = 162\n",
      "23:03:31 [DEBUG] train episode 1625: reward = -132.00, steps = 133\n",
      "23:03:31 [DEBUG] train episode 1626: reward = -127.00, steps = 128\n",
      "23:03:31 [DEBUG] train episode 1627: reward = -137.00, steps = 138\n",
      "23:03:31 [DEBUG] train episode 1628: reward = -140.00, steps = 141\n",
      "23:03:31 [DEBUG] train episode 1629: reward = -115.00, steps = 116\n",
      "23:03:31 [DEBUG] train episode 1630: reward = -134.00, steps = 135\n",
      "23:03:31 [DEBUG] train episode 1631: reward = -118.00, steps = 119\n",
      "23:03:31 [DEBUG] train episode 1632: reward = -135.00, steps = 136\n",
      "23:03:32 [DEBUG] train episode 1633: reward = -140.00, steps = 141\n",
      "23:03:32 [DEBUG] train episode 1634: reward = -132.00, steps = 133\n",
      "23:03:32 [DEBUG] train episode 1635: reward = -136.00, steps = 137\n",
      "23:03:32 [DEBUG] train episode 1636: reward = -138.00, steps = 139\n",
      "23:03:32 [DEBUG] train episode 1637: reward = -142.00, steps = 143\n",
      "23:03:32 [DEBUG] train episode 1638: reward = -127.00, steps = 128\n",
      "23:03:32 [DEBUG] train episode 1639: reward = -121.00, steps = 122\n",
      "23:03:33 [DEBUG] train episode 1640: reward = -112.00, steps = 113\n",
      "23:03:33 [DEBUG] train episode 1641: reward = -152.00, steps = 153\n",
      "23:03:33 [DEBUG] train episode 1642: reward = -192.00, steps = 193\n",
      "23:03:33 [DEBUG] train episode 1643: reward = -162.00, steps = 163\n",
      "23:03:33 [DEBUG] train episode 1644: reward = -141.00, steps = 142\n",
      "23:03:33 [DEBUG] train episode 1645: reward = -161.00, steps = 162\n",
      "23:03:34 [DEBUG] train episode 1646: reward = -226.00, steps = 227\n",
      "23:03:34 [DEBUG] train episode 1647: reward = -141.00, steps = 142\n",
      "23:03:34 [DEBUG] train episode 1648: reward = -143.00, steps = 144\n",
      "23:03:34 [DEBUG] train episode 1649: reward = -117.00, steps = 118\n",
      "23:03:34 [DEBUG] train episode 1650: reward = -141.00, steps = 142\n",
      "23:03:34 [DEBUG] train episode 1651: reward = -112.00, steps = 113\n",
      "23:03:35 [DEBUG] train episode 1652: reward = -122.00, steps = 123\n",
      "23:03:35 [DEBUG] train episode 1653: reward = -152.00, steps = 153\n",
      "23:03:35 [DEBUG] train episode 1654: reward = -139.00, steps = 140\n",
      "23:03:35 [DEBUG] train episode 1655: reward = -259.00, steps = 260\n",
      "23:03:35 [DEBUG] train episode 1656: reward = -128.00, steps = 129\n",
      "23:03:35 [DEBUG] train episode 1657: reward = -149.00, steps = 150\n",
      "23:03:36 [DEBUG] train episode 1658: reward = -114.00, steps = 115\n",
      "23:03:36 [DEBUG] train episode 1659: reward = -138.00, steps = 139\n",
      "23:03:36 [DEBUG] train episode 1660: reward = -153.00, steps = 154\n",
      "23:03:36 [DEBUG] train episode 1661: reward = -127.00, steps = 128\n",
      "23:03:36 [DEBUG] train episode 1662: reward = -117.00, steps = 118\n",
      "23:03:36 [DEBUG] train episode 1663: reward = -133.00, steps = 134\n",
      "23:03:36 [DEBUG] train episode 1664: reward = -137.00, steps = 138\n",
      "23:03:37 [DEBUG] train episode 1665: reward = -155.00, steps = 156\n",
      "23:03:37 [DEBUG] train episode 1666: reward = -132.00, steps = 133\n",
      "23:03:37 [DEBUG] train episode 1667: reward = -146.00, steps = 147\n",
      "23:03:37 [DEBUG] train episode 1668: reward = -139.00, steps = 140\n",
      "23:03:37 [DEBUG] train episode 1669: reward = -219.00, steps = 220\n",
      "23:03:37 [DEBUG] train episode 1670: reward = -193.00, steps = 194\n",
      "23:03:37 [DEBUG] train episode 1671: reward = -143.00, steps = 144\n",
      "23:03:38 [DEBUG] train episode 1672: reward = -153.00, steps = 154\n",
      "23:03:38 [DEBUG] train episode 1673: reward = -130.00, steps = 131\n",
      "23:03:38 [DEBUG] train episode 1674: reward = -149.00, steps = 150\n",
      "23:03:38 [DEBUG] train episode 1675: reward = -189.00, steps = 190\n",
      "23:03:38 [DEBUG] train episode 1676: reward = -161.00, steps = 162\n",
      "23:03:38 [DEBUG] train episode 1677: reward = -145.00, steps = 146\n",
      "23:03:39 [DEBUG] train episode 1678: reward = -123.00, steps = 124\n",
      "23:03:39 [DEBUG] train episode 1679: reward = -168.00, steps = 169\n",
      "23:03:39 [DEBUG] train episode 1680: reward = -140.00, steps = 141\n",
      "23:03:39 [DEBUG] train episode 1681: reward = -137.00, steps = 138\n",
      "23:03:39 [DEBUG] train episode 1682: reward = -117.00, steps = 118\n",
      "23:03:39 [DEBUG] train episode 1683: reward = -167.00, steps = 168\n",
      "23:03:39 [DEBUG] train episode 1684: reward = -129.00, steps = 130\n",
      "23:03:40 [DEBUG] train episode 1685: reward = -168.00, steps = 169\n",
      "23:03:40 [DEBUG] train episode 1686: reward = -129.00, steps = 130\n",
      "23:03:40 [DEBUG] train episode 1687: reward = -151.00, steps = 152\n",
      "23:03:40 [DEBUG] train episode 1688: reward = -147.00, steps = 148\n",
      "23:03:40 [DEBUG] train episode 1689: reward = -104.00, steps = 105\n",
      "23:03:40 [DEBUG] train episode 1690: reward = -138.00, steps = 139\n",
      "23:03:40 [DEBUG] train episode 1691: reward = -193.00, steps = 194\n",
      "23:03:41 [DEBUG] train episode 1692: reward = -122.00, steps = 123\n",
      "23:03:41 [DEBUG] train episode 1693: reward = -155.00, steps = 156\n",
      "23:03:41 [DEBUG] train episode 1694: reward = -135.00, steps = 136\n",
      "23:03:41 [DEBUG] train episode 1695: reward = -154.00, steps = 155\n",
      "23:03:41 [DEBUG] train episode 1696: reward = -165.00, steps = 166\n",
      "23:03:41 [DEBUG] train episode 1697: reward = -128.00, steps = 129\n",
      "23:03:42 [DEBUG] train episode 1698: reward = -157.00, steps = 158\n",
      "23:03:42 [DEBUG] train episode 1699: reward = -155.00, steps = 156\n",
      "23:03:42 [DEBUG] train episode 1700: reward = -169.00, steps = 170\n",
      "23:03:42 [DEBUG] train episode 1701: reward = -216.00, steps = 217\n",
      "23:03:42 [DEBUG] train episode 1702: reward = -135.00, steps = 136\n",
      "23:03:42 [DEBUG] train episode 1703: reward = -130.00, steps = 131\n",
      "23:03:43 [DEBUG] train episode 1704: reward = -124.00, steps = 125\n",
      "23:03:43 [DEBUG] train episode 1705: reward = -128.00, steps = 129\n",
      "23:03:43 [DEBUG] train episode 1706: reward = -129.00, steps = 130\n",
      "23:03:43 [DEBUG] train episode 1707: reward = -148.00, steps = 149\n",
      "23:03:43 [DEBUG] train episode 1708: reward = -168.00, steps = 169\n",
      "23:03:43 [DEBUG] train episode 1709: reward = -158.00, steps = 159\n",
      "23:03:43 [DEBUG] train episode 1710: reward = -116.00, steps = 117\n",
      "23:03:44 [DEBUG] train episode 1711: reward = -140.00, steps = 141\n",
      "23:03:44 [DEBUG] train episode 1712: reward = -134.00, steps = 135\n",
      "23:03:44 [DEBUG] train episode 1713: reward = -118.00, steps = 119\n",
      "23:03:44 [DEBUG] train episode 1714: reward = -133.00, steps = 134\n",
      "23:03:44 [DEBUG] train episode 1715: reward = -182.00, steps = 183\n",
      "23:03:44 [DEBUG] train episode 1716: reward = -183.00, steps = 184\n",
      "23:03:45 [DEBUG] train episode 1717: reward = -185.00, steps = 186\n",
      "23:03:45 [DEBUG] train episode 1718: reward = -158.00, steps = 159\n",
      "23:03:45 [DEBUG] train episode 1719: reward = -136.00, steps = 137\n",
      "23:03:45 [DEBUG] train episode 1720: reward = -165.00, steps = 166\n",
      "23:03:45 [DEBUG] train episode 1721: reward = -133.00, steps = 134\n",
      "23:03:45 [DEBUG] train episode 1722: reward = -118.00, steps = 119\n",
      "23:03:46 [DEBUG] train episode 1723: reward = -136.00, steps = 137\n",
      "23:03:46 [DEBUG] train episode 1724: reward = -119.00, steps = 120\n",
      "23:03:46 [DEBUG] train episode 1725: reward = -121.00, steps = 122\n",
      "23:03:46 [DEBUG] train episode 1726: reward = -164.00, steps = 165\n",
      "23:03:46 [DEBUG] train episode 1727: reward = -129.00, steps = 130\n",
      "23:03:46 [DEBUG] train episode 1728: reward = -148.00, steps = 149\n",
      "23:03:47 [DEBUG] train episode 1729: reward = -165.00, steps = 166\n",
      "23:03:47 [DEBUG] train episode 1730: reward = -181.00, steps = 182\n",
      "23:03:47 [DEBUG] train episode 1731: reward = -171.00, steps = 172\n",
      "23:03:47 [DEBUG] train episode 1732: reward = -131.00, steps = 132\n",
      "23:03:47 [DEBUG] train episode 1733: reward = -151.00, steps = 152\n",
      "23:03:47 [DEBUG] train episode 1734: reward = -273.00, steps = 274\n",
      "23:03:48 [DEBUG] train episode 1735: reward = -120.00, steps = 121\n",
      "23:03:48 [DEBUG] train episode 1736: reward = -159.00, steps = 160\n",
      "23:03:48 [DEBUG] train episode 1737: reward = -140.00, steps = 141\n",
      "23:03:48 [DEBUG] train episode 1738: reward = -148.00, steps = 149\n",
      "23:03:48 [DEBUG] train episode 1739: reward = -118.00, steps = 119\n",
      "23:03:48 [DEBUG] train episode 1740: reward = -157.00, steps = 158\n",
      "23:03:48 [DEBUG] train episode 1741: reward = -131.00, steps = 132\n",
      "23:03:49 [DEBUG] train episode 1742: reward = -124.00, steps = 125\n",
      "23:03:49 [DEBUG] train episode 1743: reward = -103.00, steps = 104\n",
      "23:03:49 [DEBUG] train episode 1744: reward = -153.00, steps = 154\n",
      "23:03:49 [DEBUG] train episode 1745: reward = -209.00, steps = 210\n",
      "23:03:49 [DEBUG] train episode 1746: reward = -135.00, steps = 136\n",
      "23:03:49 [DEBUG] train episode 1747: reward = -127.00, steps = 128\n",
      "23:03:50 [DEBUG] train episode 1748: reward = -216.00, steps = 217\n",
      "23:03:50 [DEBUG] train episode 1749: reward = -141.00, steps = 142\n",
      "23:03:50 [DEBUG] train episode 1750: reward = -144.00, steps = 145\n",
      "23:03:50 [DEBUG] train episode 1751: reward = -118.00, steps = 119\n",
      "23:03:50 [DEBUG] train episode 1752: reward = -112.00, steps = 113\n",
      "23:03:50 [DEBUG] train episode 1753: reward = -112.00, steps = 113\n",
      "23:03:50 [DEBUG] train episode 1754: reward = -195.00, steps = 196\n",
      "23:03:51 [DEBUG] train episode 1755: reward = -124.00, steps = 125\n",
      "23:03:51 [DEBUG] train episode 1756: reward = -146.00, steps = 147\n",
      "23:03:51 [DEBUG] train episode 1757: reward = -119.00, steps = 120\n",
      "23:03:51 [DEBUG] train episode 1758: reward = -137.00, steps = 138\n",
      "23:03:51 [DEBUG] train episode 1759: reward = -119.00, steps = 120\n",
      "23:03:51 [DEBUG] train episode 1760: reward = -159.00, steps = 160\n",
      "23:03:51 [DEBUG] train episode 1761: reward = -135.00, steps = 136\n",
      "23:03:51 [DEBUG] train episode 1762: reward = -99.00, steps = 100\n",
      "23:03:52 [DEBUG] train episode 1763: reward = -141.00, steps = 142\n",
      "23:03:52 [DEBUG] train episode 1764: reward = -177.00, steps = 178\n",
      "23:03:52 [DEBUG] train episode 1765: reward = -130.00, steps = 131\n",
      "23:03:52 [DEBUG] train episode 1766: reward = -103.00, steps = 104\n",
      "23:03:52 [DEBUG] train episode 1767: reward = -117.00, steps = 118\n",
      "23:03:52 [DEBUG] train episode 1768: reward = -160.00, steps = 161\n",
      "23:03:52 [DEBUG] train episode 1769: reward = -128.00, steps = 129\n",
      "23:03:53 [DEBUG] train episode 1770: reward = -104.00, steps = 105\n",
      "23:03:53 [DEBUG] train episode 1771: reward = -176.00, steps = 177\n",
      "23:03:53 [DEBUG] train episode 1772: reward = -115.00, steps = 116\n",
      "23:03:53 [DEBUG] train episode 1773: reward = -115.00, steps = 116\n",
      "23:03:53 [DEBUG] train episode 1774: reward = -140.00, steps = 141\n",
      "23:03:53 [DEBUG] train episode 1775: reward = -129.00, steps = 130\n",
      "23:03:53 [DEBUG] train episode 1776: reward = -120.00, steps = 121\n",
      "23:03:54 [DEBUG] train episode 1777: reward = -120.00, steps = 121\n",
      "23:03:54 [DEBUG] train episode 1778: reward = -112.00, steps = 113\n",
      "23:03:54 [DEBUG] train episode 1779: reward = -116.00, steps = 117\n",
      "23:03:54 [DEBUG] train episode 1780: reward = -120.00, steps = 121\n",
      "23:03:54 [DEBUG] train episode 1781: reward = -205.00, steps = 206\n",
      "23:03:54 [DEBUG] train episode 1782: reward = -112.00, steps = 113\n",
      "23:03:54 [DEBUG] train episode 1783: reward = -128.00, steps = 129\n",
      "23:03:55 [DEBUG] train episode 1784: reward = -137.00, steps = 138\n",
      "23:03:55 [DEBUG] train episode 1785: reward = -121.00, steps = 122\n",
      "23:03:55 [DEBUG] train episode 1786: reward = -138.00, steps = 139\n",
      "23:03:55 [DEBUG] train episode 1787: reward = -161.00, steps = 162\n",
      "23:03:55 [DEBUG] train episode 1788: reward = -129.00, steps = 130\n",
      "23:03:55 [DEBUG] train episode 1789: reward = -105.00, steps = 106\n",
      "23:03:55 [DEBUG] train episode 1790: reward = -121.00, steps = 122\n",
      "23:03:55 [DEBUG] train episode 1791: reward = -113.00, steps = 114\n",
      "23:03:56 [DEBUG] train episode 1792: reward = -151.00, steps = 152\n",
      "23:03:56 [DEBUG] train episode 1793: reward = -127.00, steps = 128\n",
      "23:03:56 [DEBUG] train episode 1794: reward = -147.00, steps = 148\n",
      "23:03:56 [DEBUG] train episode 1795: reward = -119.00, steps = 120\n",
      "23:03:56 [DEBUG] train episode 1796: reward = -111.00, steps = 112\n",
      "23:03:56 [DEBUG] train episode 1797: reward = -89.00, steps = 90\n",
      "23:03:56 [DEBUG] train episode 1798: reward = -96.00, steps = 97\n",
      "23:03:56 [INFO] ==== test ====\n",
      "23:03:56 [DEBUG] test episode 0: reward = -125.00, steps = 126\n",
      "23:03:57 [DEBUG] test episode 1: reward = -161.00, steps = 162\n",
      "23:03:57 [DEBUG] test episode 2: reward = -103.00, steps = 104\n",
      "23:03:57 [DEBUG] test episode 3: reward = -93.00, steps = 94\n",
      "23:03:57 [DEBUG] test episode 4: reward = -114.00, steps = 115\n",
      "23:03:57 [DEBUG] test episode 5: reward = -141.00, steps = 142\n",
      "23:03:57 [DEBUG] test episode 6: reward = -114.00, steps = 115\n",
      "23:03:57 [DEBUG] test episode 7: reward = -117.00, steps = 118\n",
      "23:03:57 [DEBUG] test episode 8: reward = -85.00, steps = 86\n",
      "23:03:57 [DEBUG] test episode 9: reward = -141.00, steps = 142\n",
      "23:03:57 [DEBUG] test episode 10: reward = -144.00, steps = 145\n",
      "23:03:57 [DEBUG] test episode 11: reward = -105.00, steps = 106\n",
      "23:03:57 [DEBUG] test episode 12: reward = -105.00, steps = 106\n",
      "23:03:58 [DEBUG] test episode 13: reward = -111.00, steps = 112\n",
      "23:03:58 [DEBUG] test episode 14: reward = -113.00, steps = 114\n",
      "23:03:58 [DEBUG] test episode 15: reward = -124.00, steps = 125\n",
      "23:03:58 [DEBUG] test episode 16: reward = -111.00, steps = 112\n",
      "23:03:58 [DEBUG] test episode 17: reward = -90.00, steps = 91\n",
      "23:03:58 [DEBUG] test episode 18: reward = -96.00, steps = 97\n",
      "23:03:58 [DEBUG] test episode 19: reward = -112.00, steps = 113\n",
      "23:03:58 [DEBUG] test episode 20: reward = -262.00, steps = 263\n",
      "23:03:58 [DEBUG] test episode 21: reward = -118.00, steps = 119\n",
      "23:03:58 [DEBUG] test episode 22: reward = -162.00, steps = 163\n",
      "23:03:59 [DEBUG] test episode 23: reward = -149.00, steps = 150\n",
      "23:03:59 [DEBUG] test episode 24: reward = -134.00, steps = 135\n",
      "23:03:59 [DEBUG] test episode 25: reward = -116.00, steps = 117\n",
      "23:03:59 [DEBUG] test episode 26: reward = -104.00, steps = 105\n",
      "23:03:59 [DEBUG] test episode 27: reward = -106.00, steps = 107\n",
      "23:03:59 [DEBUG] test episode 28: reward = -121.00, steps = 122\n",
      "23:03:59 [DEBUG] test episode 29: reward = -114.00, steps = 115\n",
      "23:03:59 [DEBUG] test episode 30: reward = -142.00, steps = 143\n",
      "23:03:59 [DEBUG] test episode 31: reward = -174.00, steps = 175\n",
      "23:03:59 [DEBUG] test episode 32: reward = -108.00, steps = 109\n",
      "23:03:59 [DEBUG] test episode 33: reward = -128.00, steps = 129\n",
      "23:04:00 [DEBUG] test episode 34: reward = -112.00, steps = 113\n",
      "23:04:00 [DEBUG] test episode 35: reward = -119.00, steps = 120\n",
      "23:04:00 [DEBUG] test episode 36: reward = -123.00, steps = 124\n",
      "23:04:00 [DEBUG] test episode 37: reward = -128.00, steps = 129\n",
      "23:04:00 [DEBUG] test episode 38: reward = -103.00, steps = 104\n",
      "23:04:00 [DEBUG] test episode 39: reward = -108.00, steps = 109\n",
      "23:04:00 [DEBUG] test episode 40: reward = -94.00, steps = 95\n",
      "23:04:00 [DEBUG] test episode 41: reward = -139.00, steps = 140\n",
      "23:04:00 [DEBUG] test episode 42: reward = -142.00, steps = 143\n",
      "23:04:00 [DEBUG] test episode 43: reward = -130.00, steps = 131\n",
      "23:04:00 [DEBUG] test episode 44: reward = -94.00, steps = 95\n",
      "23:04:01 [DEBUG] test episode 45: reward = -128.00, steps = 129\n",
      "23:04:01 [DEBUG] test episode 46: reward = -143.00, steps = 144\n",
      "23:04:01 [DEBUG] test episode 47: reward = -113.00, steps = 114\n",
      "23:04:01 [DEBUG] test episode 48: reward = -134.00, steps = 135\n",
      "23:04:01 [DEBUG] test episode 49: reward = -153.00, steps = 154\n",
      "23:04:01 [DEBUG] test episode 50: reward = -101.00, steps = 102\n",
      "23:04:01 [DEBUG] test episode 51: reward = -149.00, steps = 150\n",
      "23:04:01 [DEBUG] test episode 52: reward = -140.00, steps = 141\n",
      "23:04:01 [DEBUG] test episode 53: reward = -90.00, steps = 91\n",
      "23:04:01 [DEBUG] test episode 54: reward = -129.00, steps = 130\n",
      "23:04:01 [DEBUG] test episode 55: reward = -120.00, steps = 121\n",
      "23:04:02 [DEBUG] test episode 56: reward = -128.00, steps = 129\n",
      "23:04:02 [DEBUG] test episode 57: reward = -91.00, steps = 92\n",
      "23:04:02 [DEBUG] test episode 58: reward = -103.00, steps = 104\n",
      "23:04:02 [DEBUG] test episode 59: reward = -109.00, steps = 110\n",
      "23:04:02 [DEBUG] test episode 60: reward = -102.00, steps = 103\n",
      "23:04:02 [DEBUG] test episode 61: reward = -124.00, steps = 125\n",
      "23:04:02 [DEBUG] test episode 62: reward = -124.00, steps = 125\n",
      "23:04:02 [DEBUG] test episode 63: reward = -105.00, steps = 106\n",
      "23:04:02 [DEBUG] test episode 64: reward = -125.00, steps = 126\n",
      "23:04:02 [DEBUG] test episode 65: reward = -124.00, steps = 125\n",
      "23:04:02 [DEBUG] test episode 66: reward = -96.00, steps = 97\n",
      "23:04:02 [DEBUG] test episode 67: reward = -128.00, steps = 129\n",
      "23:04:03 [DEBUG] test episode 68: reward = -121.00, steps = 122\n",
      "23:04:03 [DEBUG] test episode 69: reward = -152.00, steps = 153\n",
      "23:04:03 [DEBUG] test episode 70: reward = -151.00, steps = 152\n",
      "23:04:03 [DEBUG] test episode 71: reward = -104.00, steps = 105\n",
      "23:04:03 [DEBUG] test episode 72: reward = -123.00, steps = 124\n",
      "23:04:03 [DEBUG] test episode 73: reward = -98.00, steps = 99\n",
      "23:04:03 [DEBUG] test episode 74: reward = -95.00, steps = 96\n",
      "23:04:03 [DEBUG] test episode 75: reward = -122.00, steps = 123\n",
      "23:04:03 [DEBUG] test episode 76: reward = -135.00, steps = 136\n",
      "23:04:03 [DEBUG] test episode 77: reward = -148.00, steps = 149\n",
      "23:04:03 [DEBUG] test episode 78: reward = -124.00, steps = 125\n",
      "23:04:04 [DEBUG] test episode 79: reward = -125.00, steps = 126\n",
      "23:04:04 [DEBUG] test episode 80: reward = -174.00, steps = 175\n",
      "23:04:04 [DEBUG] test episode 81: reward = -116.00, steps = 117\n",
      "23:04:04 [DEBUG] test episode 82: reward = -103.00, steps = 104\n",
      "23:04:04 [DEBUG] test episode 83: reward = -115.00, steps = 116\n",
      "23:04:04 [DEBUG] test episode 84: reward = -110.00, steps = 111\n",
      "23:04:04 [DEBUG] test episode 85: reward = -123.00, steps = 124\n",
      "23:04:04 [DEBUG] test episode 86: reward = -139.00, steps = 140\n",
      "23:04:04 [DEBUG] test episode 87: reward = -131.00, steps = 132\n",
      "23:04:04 [DEBUG] test episode 88: reward = -101.00, steps = 102\n",
      "23:04:04 [DEBUG] test episode 89: reward = -128.00, steps = 129\n",
      "23:04:05 [DEBUG] test episode 90: reward = -141.00, steps = 142\n",
      "23:04:05 [DEBUG] test episode 91: reward = -180.00, steps = 181\n",
      "23:04:05 [DEBUG] test episode 92: reward = -120.00, steps = 121\n",
      "23:04:05 [DEBUG] test episode 93: reward = -129.00, steps = 130\n",
      "23:04:05 [DEBUG] test episode 94: reward = -114.00, steps = 115\n",
      "23:04:05 [DEBUG] test episode 95: reward = -97.00, steps = 98\n",
      "23:04:05 [DEBUG] test episode 96: reward = -138.00, steps = 139\n",
      "23:04:05 [DEBUG] test episode 97: reward = -126.00, steps = 127\n",
      "23:04:05 [DEBUG] test episode 98: reward = -169.00, steps = 170\n",
      "23:04:05 [DEBUG] test episode 99: reward = -133.00, steps = 134\n",
      "23:04:05 [INFO] average episode reward = -123.79 ± 24.41\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD4CAYAAAAEhuazAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO2dd5gV5fXHv2c7bel9wQVEkCZNSkRERWkWxIbR6E+jJEaNKZqAGCQxCJbEkogtMUrUIDEiKAJSRKUoLr3DIkvvnQW2vr8/7szduXNn7vSZW87neXi4O+Wdc+fOfOfMec97XhJCgGEYhkkt0oI2gGEYhvEfFn+GYZgUhMWfYRgmBWHxZxiGSUFY/BmGYVKQjKANMEuDBg1Efn5+0GYwDMMkDA0aNMDcuXPnCiEGq9cljPjn5+ejoKAgaDMYhmESCiJqoLWcwz4MwzApCIs/wzBMCsLizzAMk4Kw+DMMw6QgLP4MwzApCIs/wzBMCsLizzAMk4Kw+DMMw8QphYfOYNn2o560nTCDvBiGYVKNgX/9CgBQNGmY622z588wDJOCOBJ/IrqViDYQUSUR9VStG0NEhUS0hYgGKZb3IKJ10rpXiIic2MAwDMNYx6nnvx7ACABfKxcSUQcAIwF0BDAYwGQiSpdWvwZgFIC20r+ogkMMwzBMFSXlFa636Uj8hRCbhBBbNFbdCGCqEKJECLEDQCGAXkTUFECuEGKZCE0ePAXAcCc2MAzDJDordx3HP775IWp58zrVAABpHgRIvOrwbQ7gW8Xfe6RlZdJn9XKGYZiUZcTkpQCA+y9vHbG8dcMaaJSbjcx097tnDVskovlEtF7j342xdtNYJmIs1zv2KCIqIKKCw4cPG5nKML4za+1+TF5UGLQZTJyx98Q5nCkpj7nNlGVFmFawO2LZ5EWFCAVFQlRUCmSkedMtauj5CyEG2mh3D4AWir/zAOyTludpLNc79psA3gSAnj176j4kGCYoHvpgJQDgFwMuDNgSJp64bNJCtG1UE/N+c4XuNuNmbAAA3NazSiqfm7MF9apnYWSvlvj1h6uxdPtR9G1d3xMbvUr1nAlgJBFlE1ErhDp2lwsh9gM4TUR9pCyfuwHM8MgGhmESiO9+OIrKSvd8vNW7T+BcqfsdpWbZduiMrf027T8FIQSmr9oLAMhI98bzd5rqeRMR7QHQF8AsIpoLAEKIDQCmAdgIYA6Ah4QQ8q/wIIB/INQJvB3AbCc2MMGy7eBp7D52NmgzmARGCIFxM9bj9je/xbvLilxp88iZEgx/dQke+2iNpf2OF5di9e4TrthgFyJCheIhmO5R2Mdpts90IUSeECJbCNFYCDFIsW6CEKKNEKKdEGK2YnmBEKKTtO5hoQxwMQnHNS9+jcuf+zJoM5iAmb/xIM6X2fOyP193AFOW7QQQKmfgBrLHP3vdfrR7cjY27T9lar8BLyzC8FeX4KX5W8PLKioF/vzZRhw8dR5A6GE1Z/0BnCutwLyNBx3b+l9V3H/93pN465sd4b/TPRoKxSN8mbhl1tr9OFsau9OMCZ5Vu47j/ikFmDBrk6nt/1uwGx3HzUFZRSUA4FhxSXhdhUthH9lbrhRASXkl3vt2p6n9Tp4rAwC8NH8bvv0hVFPn7cU78I/FO9D7mQUAgJlr9uHn763AxePm4IEpBRg3Y73tBx8APP7R2oi/C3Yex7NzNof/zslMV+/iCiz+jGucKSlH/uhZmL5qj/HGBqzbcxIPfbAST05f74Jl+nR/eh7Gz9zg6TESgfV7T2LFzmO29pUFs+hose42s9ftD3vOYz5eh+LSCpSUh8T/DzOqzn+5S+If6yGyYucx5I+ehaIj+vYCwImzoe+198S58LL80bPwt4WR2V1Tlu3Ec3O0hjuFOFNSjo9W7IHdIEf1LBZ/35mzfj9+OOzOa2iisnbPCeSPnoX1e08abrtPukle/XK74+PKaXLKG08LIQT+vawIp86XhZedK61A/uhZeHdpkeFxjhWX4h2D7Ub/by3yR88ybMsqU5fvQv7oWRG2e80bX21H/uhZWLztCFbuOh5eft3fFuPm15aFBdoKRhVaSssr8eD7K/Hjt0JDf2SB1xJDtzp8Y4n/RytCHalLth8x1Valyk6t0NTbS3agslLgeHFp1LUybsZ6PPbfNXjz6x/w7tIiCCEsnWcW/wD4+XsrcdVfvgrajECRY5oLNh0y3FaWAKseTkWlwGuLtmtmZhi19N2OY/jDjA0Y90nVG8JRKYzw5tfRIyb1KK+o1LV76ve7NZc75Z+LQ3HdAyetC65dZK/1rn9+Fx5YpOSuf3xnuc2q3117fXllyMNXP8i19Fn5YKioFLj/3e81H7wVlQKTFxXqhgXlY8q8/90uVFYK/OWLLfjP8l1R9pZVVOLVL7XHa5RVmLueyysFftB4mzh8OnQ9Tpy9GU/N3ICBf/0qHEIyQw6Lf3IyfdUerFJ4YPHAip3HMHNNaPhF+MY2lGFAdgCt+m7TV+3Fs3M2R3SyUdWBYyI/MI6fNfaeyysqdWukXDh2Nl6cFzr+7HX7w/HeZEPtxarZZZC5VVEpouLbVb+7dtuyFx5VokBjc3nbCbM2oc0Tn2O+jtPx2dp9eG7OFrwwd6vmeq3w0fDJSyJCNsotphXsxvNzI0M3p86V4S9fbME5k/1OZRWVKFGdGyFE1JvR9sOxw01qsjwY3Quw+AfClGVF4XSyX3+4BjdpeGBBcvNry/DL/6wK/SFduOaceXtZCeekG+a0YkSk3NLyomMx3yRkMTOTDXfT5KVo9+Qc3fUfSB7hg++vxMg3v9XdDgiNxHQrM8UPikvKcb6swlD85Ti8mspKgRNnS/GL91eg/R8iz6Ec819SqP3ALJc857OlFdh68HRVmxq2yB7720t2RK0DgNPny1BaXhl+6BfrjKIt1/DW1+5RhS4Vx9d66xz7yTr8bWEhFm42fusFQuL/Y9Wb0zfbjuDrrc6qE3hR1wfgyVwCQR7Z58UEDW5j67Kz6PrLx1i46RCm5e3GbZe2iFh/6HQJGufmaO4rO3hGudCb9p/COsN+i8g2ikvKUSM7+hY5V1qB5+ZswVsWwkpB0/GpubigfnWcL9MWdzXHikvxh0/W49lbuqBmdgZenL81qqNzwaaD+G7HsYjw2oGT59GoVjbSFL+H0gu/4e+Lw5+1xF+IUB+BXsi+8/gv0OOCurilR572BhrH1OOv87aiVk4mbuzaTLNvSQ73ZGWkAzD2/rXCQ7//31qNLa0Rl3n+TPwwrWA3Zqze61n7ZvTcbthH5sCp8/iddLMoX5WFAIqOFGPUlIKozlFZQIw6HZXxbb20PPU91kcVl5W9TDm8YVZIZdbvPYkJszbazvowy6tfFmJpYXRn5s6j5gbjCSEw8fNNmLVuPzo9NRcAMHv9gajtfvpuQVS/Sp+JC/DKwm0Ry5Txd+WbhV6I6dGpq6KWvfFVVRLBip3Hw0774TMl+N1Ha6LCeeUVxr/N8bNl+NWHqzF+5gb8a0mR7nalJsspl2kcc78L/Tlmki3swOKvg5vDzP3gdx+txaNTV7vebviV04RgGXX4Hjx1HkfPVOV0T/x8k+4gGbUQPzp1Fb7YeBBdxn+BF+dtDfdJCI2wj/qVv7JShENLAMLD5qOPGXnQ06qQgvqV3ioj3/wWb32zA8UmSg6Mnb4OS01mo6h5fu4WR7ZWVIooz9mK7/nNtki7lb+H8hz/dlr06FsB7QfNxNmbIzp+5Qfwws2HMK1gT9R1ZCVl9KMVsVOT2zfNNdXOjNW6ZcocscMgJdUuLP46VAQw8HjHkWLc+vpSnPYo9e/omRLN3ObKSoEHphRoio2WN3/6fBlufX1pRFvnyyqwaf/pqG2V9H5mAXr8eX747ze+/gEPTCnQ3FbtyCvDCC8v2Bbuk5DvcaWoTJwdGmy0N5x6Ghmu0BOyA6fOR6Q/ApFOwBqHw/5lE80MZHr/u1348VvOHjZ2Ka8UUSEZK3Fn9ZZKIVaOVhWInqTE7IhZo5o9WjF/PdIMwipaHr0WyoFZbsJhH5+xMtKw6Eixrfo2f/kiMrvghblb8H3RcXzlsINIj8ueXYgBLyzCudKKCJE7U1qOeRsPYtSUFbr7KrVg4eZD+L7oOP4yryrT4rfT1oQrXFp9bhptfqy4FNt1OlerOnyrbpAVOyMFXO2JxkKd/qj2/pWYyYBSIt/ExSXlGPLyN6YKfx06dT6ik9QPQuIfuUyt/Xe/vVx3f/WDQhmCUa7acaQYT3xsbxDfn1WjiRduPoQRk5fg4KnzeGfJDpRVmg/JnT4fO56/alewtX686vBl8dfBivgPeGGRrfo26g60Wev2A/Dux5Zj1I99tAYjJi/FoXCtEv19tCyRRaxCcYPJtgPWRfFLzWyKqiMPfeUbnNK5QeWfSXnKjpwpjdhGzkYJt+zw9Gqdr+5Pz8OhU+cx7fvd+Mk/tT122etdu+eE6VozvZ5ZgGtf/Np4Q5vsOno26o2mokJE/LZA9DUZM4OFQuG4BZsOorJSRHSEqrOJvtgQHeKxw8cr92LlrhPo/cwCjP90I2bohPYSkTSPVJqzfXQIIuwj49FbXpgNUgfSmZJy3P/3xbgkr47utlo53A9/EAq52K3DcvVfFuGOXi3Df2ul0pkVaKHh+Udto3oYEeT0VRP2a2yyXWPU97HiUszbdBBjpXIUZ0vLUT0rA6t2HUf9GtloWb96OLxgNGjIjF1fbz2Mzs1ro26NLOPvEIP+z0c7LeWVlRFhk1Pnyyw9MNMI+HTtfvzyP6sw/voOaNOopu62Hvk52GGyczsRYM/fZyosxAzdR/vH3nfinCshIWVmzNo9J/FvqeiV1lEpRp6/XihUua0QAm2e+BzvKPK2tx8ujnptjzpuzLUh8kfPwlIpt3xx4RE88p9V6PfswohtNu7T8LClxs08u9QjRQHghr8v0dyWFFbLoYSbJi9F/+e/hBAi/FA3Gh+gZde2g6eRP3oW5m08iPzRs3D328tx7zvfx2xn7Z4TqKgUaD3GWmmKikoR8WA/W1JhSYDSiMId+zuOFONYcanutkZZWnZx2j8TT3h1jtjz1yEePf8hL3+Dk+fKPBsfoP7G01ftCd9E8rp1ioEyegOGlIvLJSF52mTFRxmzA98+lMrhHisuxadrorMtTp8v0w1rmXlziVUfSJ3q+cT0deHPIyYvRdeWVW9U9/zrexw8FRLElxdEpkICIU++YOdx3NW7JeppePNyWO2x/1ZlyGi9gVRG5NQvwaY/DTb1kFNyuqQcCxRvY+fLKiy9jS7dfhRLt4ceyt8XHce7y/QraqpDckw0Hs3lwp6/Hm6VlrWDlne/+cAp124UOXVM/Q3lqpwyv/5wDb6Qsi9kAb1eMUgn1sjHEZOX4PLnFobPo8eRLF0EomdUkm0xGu0KACdU5/zfy4pMHXfviXOYtbaqHyRWjFyIUAfqKwu2odczC7D7ePSAI/lyzMqIfcuqnRar/S9AKAVXyfnyCtvxmY0m+zYShUEdG+O9n/b29Zgc9vEZM8LgFe9/tytq2b8WF7l+nN9o5FnrYVVEVu46gd3HzunXdQmQquJhxttmquqqKMsPu4X63N76evRbj9wPoKzzIp9RZbqk2mmxUxJbXU9n8EvfYOM+bwYaJRrpaYSOzczl/buFUSqq7XY9aZVxnQ8LrFWWLKuoxPNzN+N4canuiFYrcdE3vvrB9MhU5Xay0Aal/Vomj/l4Hf61ZIep0J5XoyuVnFXlrKuzlYCq76E8twKhsuPtnpyDLQdC6aDqt8OPXcp6MVvZMtlJI/LdkfntNRd50i6Lvw5eOf5uhZOEEOFSsVp8smovXv1yO7o9PS+qEJddzBYyU37DoD1/vTeWP366EcdjdETKfLfD3gQnVtAqrawmPEBMcWGePl+ORVtC4aRbX1+Kqct3WSoVzFgnPY1APqvmJS30s/GcwOLvM+qysXaZVrAbl06Yjw06r+NuzYik5H8rzXmRygfnjiOhB4YV7ZfrrbtCjNOQSHMPyw9P9c8qT/F36nw5Rn+8Tr0b4zLpaeTZnLpaZGekIdugn8cunO2jg1cvuYu2mCsPa4ScTTF50XZ0alY7ar0XYcLXv7I+Q9f2Q6HOZXVoIxZjXBSxfyzWLg2caMh6o37bq+bRRB+MNuk+h31eHtmNUz2ThVo57pxy+XKYtXZ/RFbJqfNlOHamNCLn3G+UoZZzDia2dgOztdgTlZwMFn8j6tXIijnWwArpaaT5FlstM92Ta93LCrAc9vEZvVS9I2f04/da6Hkfw19dggEvLAqsgxWIDPto5d4z7pHhVRJ4ElG3eqZrbaWlkWahtV8NbOvaMZR42c3O4q+DV09cvQ7fvhPNd9TN23hQN4vjB2mKuMc/ipxEQl3szEuU37DAx+MmM8t1Op4zvK4FkuCs/+MgV8MmemEfr0Tay9+Xxd8id7+9PFyN8+S5Ms3JpWOh1w9rJZXu+bnWS8cePOXfJOEBDpFIWuQ+HjUTZ3tTRjhZqJmd4WoANDM9zfPaWzLdWtbB1Rc39qx9Fn8dlAL24HtVpY6/3noYf1tYiNveWIa1e4zz5N/7die6/ukLRbvmlPFQDLG2k3O9OolqnTCMFdwMgWZnpnnWAavmkasu9KyWP8Dib4rZ6w/gxNnIDqPlO46Z6lR98pP1OHG2LCz6ZjMwez2zAGd0asnbmdlHPd2el8TRYF6GcRW9tEuzb7sXxqhwardNu7D4m0QuY6zEisjJsX4rZSOeSNC8bQ77MG4zcURn09s2r1Mt4m83M9+ydbKrzJY/ubp9I9dscQqLv0kWFx6JCtlYEX/Z47cy9mr/yegCX4kBqz/jLsr5H4yorhr7YOdNNDOdsPXPQ6KWm/H8tcox3Ni1WWj/zPhJzWXxt4A8R62MnkehVUtHLo1gJYsoweaQD6NVm4Zh/KJJ7ZyIv9Ux+l759QzbICLNtOzsTGPJ1CrEJt/LOSb294v4sSTO0NLodXsjO021PIoVO4+j/R/m4MsthyIeAkNf+QaAtbBPkJVFGSZRKa8QeHBAG/y0XyvN9WY6Uf92RzcAwHO3dIlYnpVuLJlauiDfy3phoyBg8bfA2j2RdXS0LqGVUl77xyv34soXFkWttzCvdMJ6/gwTJBVC4PeD2+MP13UAEH2fGs2JWys7A4M6NgEA3Nw9L2KdXthG+UavOQBTWl3NRNjHr7cDFn8dtDpw1HX2tVK+3vg6VP/m0zX7sP9kdLqmJc+f1Z9hLHNpft2Iv3u1igzzWKnNo35LMFNkTevFQr7vYwn7Z4/0w46JQ/GjNg1M2+cEFn8HaOX5G8W7rURyOOzDJCq5FmtYVctMx5PDLnbl2L+5pl3E349cdWHE34bir1r90JVtwp/NdPhqtS/fy7FCTp2a1/ZtDAHA4u8Io0nItbAi6EFOJckwTmimSrc0YkT35rirzwWuHFstsEZ/R6G67R4f1B7dpPmY9WpzKXfREvC8utUBhMqv9LvQH8/eCBZ/Hew43St3GdexsSL+7Pgz8cSC317hWdtezpClFuOBNkomyCFY9bSeMsp7VetbXH9JKNXz4qa5aFgr2/LxvYDF30X++OlGw22sOPPbDp3Gp2v2YfxM9+eNZZKfwVKnpVvUyo4dynn6xo7hz1bDF0TezEEBVLVbLTMd68Zfizt6tTAwJnqRfNuasVFrm64t6mDt+GsxuFOTAIutR8Lir4MdpzvTxJWx94T5gVuVAnjkP6vwztIiG9Ywqc7rP+nhantG4ZIaioeDVYHz0vNXtlsrJ9NhXF17X2WCSK9W9TW3yc1xr7S0G7D4u4hRbfX80bNQWm4h15Nh4ggrRcbU+trIINRB5F1NKDfalcM6eqdAXv/Lq9uiQ7Nc5wf0AUfiT0TPE9FmIlpLRNOJqI5i3RgiKiSiLUQ0SLG8BxGtk9a9Qn52b1vATj1/vXggwyQDVm7VvLqRHb5Gd1Maka+ZLlaRPXvZRnWNHvn7yd/g3svyTeX0B4lTtZoHoJMQoguArQDGAAARdQAwEkBHAIMBTCYi+Uy8BmAUgLbSv8EObYgbvCy/yjBBY6TNSn/phVsv0V2nRbzfOmrPv/9FDSPWyx3CsgY8dX1HbHpaR9o0vqsyndQvHIm/EOILIYRcd/hbAPJwuBsBTBVClAghdgAoBNCLiJoCyBVCLBMh13oKgOFObIgnMoyGDjK+0dZC6dyRlxp0ACI0AMcuZkoCmGG4VBwsKKzoc62cTFySV1uxJFL91WLvpdefnZGOBjWz8OfhnRy3pVfPS87is/sQe3xQe7sm2cZNtboPwGzpc3MAuxXr9kjLmkuf1cvjDlsdvjyfatzw3v29TW876eYuhpkxNQ0yXWLi0mXx0shu7jRkEycCrfb81Z27XkZ80tMIBU9eg5t7VJVqmP6LH0Vs8+uBVZU4tUyR7dezMxz2cfGLeJ3qbXhFE9F8AFp3xlghxAxpm7EAygG8L++msb2IsVzv2KMQChGhZUvzJV2DIoNj/nGD1VvQy9HU8R7SMIvlr6EQQvXZVWukV5k+enRrWdd4oxiozZXDPn5/DycYqpUQYqAQopPGP1n47wFwHYA7RVUv6R4AynfpPAD7pOV5Gsv1jv2mEKKnEKJnw4YN9TbzBB5gFTxmwjFmSSOgT+t6GNW/teZ6o5/byT3txmQitasFnybo5BxEzYWhOifx8ICc8dBluuuMro+q8g3O7fDrVDjN9hkM4PcAbhBCnFWsmglgJBFlE1ErhDp2lwsh9gM4TUR9pCyfuwHMcGJDPPHpGt3nGGODXw2MnhTDLNVVYZqczHRMHdUXY4Zox1aNHvZmBHz52KtR8OTAqOVuCJveQ8tPnDzEjDx/N2fbskt+/Rq664SI9OzV1lZIGdxmPH+j7zr+ho4Y0qkJ+rX1tgyE0+fU3wHUAjCPiFYT0esAIITYAGAagI0A5gB4SAghF7d/EMA/EOoE3o6qfgKGicBJH4pejF4vJmuU2mvG621UKwcNakbns6uPGe8pgHoYZvvEWqda+cur24Y/39IjD6OuCP7hJk/UMriTfv+P3jmoFObDPkZTPraoVx2v3dUDOR5fJw56sQAhxIUx1k0AMEFjeQEA593uHnOsmGejUtOpeS7W7z2lu57I3XCZVkEuu8XujOzyMsqn1oM61TNx7mT0bG/JjPrhqgxjqdNCzfL08E74wyfrHdmlJCczHd+PHYg61aNDbEbXj51sn18MaIPJi7ZbMdFVuIdSh9veWBa0CQmH2/0kao85Xfpb6+ZU8suron0SI2/Ljuc/8GJzk3Grd03U/iQnMf/bekb237jRMWq1bLQZGtbKjjlYM2y2yn4zJZvDbUhXRKsG+mEmP2DxZ+IW9X3UuLa5aohXtItODlAK7j/v6Rm13s4LxQ1dq7KUJ9/ZXXc79Zyu6ofXl48NiNrntRjtAcZF1rzAblz+gwd644mhkbX64y0pxtA5MFgfvn4sfLGgfQAWfyZuUXv+daplhZbH2Kdjs1z0uCB6gm7ljaYVczfO9ok+qnLJj9pUFfO6pUde1LZK/nXvpREDjqpnRdszpHNT1fEj1898pB+ev6WLp1kyo/q3xrM3d9a14Ydnhppqp1pmetQDUD3bVqKg9wAUMcI+RmNIbusZ+3rxChZ/DTbsO2m8URJxv85E12rcysjYrDfsPep4OstjeFcZZl67Nfa3U8spYgIPhbUv3HoJiiYN092vae1qticu6d2qHto2qolWDWrg1p4tcEGMDBUA+OShy3Bdl6Yxt9HjiaEXR8xhqz5rakHXQ31md0wcigsb1bJlU1CoB3mpv3m9GiHHRHZQZIomDYtZXbVo0jA8d4u9Pg+nsPhrcPp8ufFGSYRXr+BNcnMARJclMHs8O3bl1auuuby1Ir6qOb+203fwGLa6EeOXHy4f/qwv5v2malKVqaP6xNyva4s6juaEVT4ojUavRufyG7dpha8eH4B37r3U1r5OURduU/PLq9vi2Zs7Y2hn4zkU4iXk5X/gkAmM7Iw0lGiUlB7RPQ9vfbPD9ePNePgybD14Gpe3bYhPVleNgTDb2WdVNF+/qzv6tdUeDKgs96B19J9f0QaLC49YOp6yHa9vaL32G+fmoGGtbBw+XWJ5X1PHVXx2O8T02SP9LBVDvKB+DcM3HbsYvR1FPdhUZmdnpOP2S+O/CoES9vwtoDd/Z6KgNXfof3/eFxc3Na4/Pu/X/S2LSOPcHFyuIcZmxd9qldTBnZrq5vcr8++1whX92jaIGarRCgtFhn30sRNSsoLxADV3MOOxD+/azHimLIlOzWubuvb0cOu0Fk0aZjoMFS9euxsktpp5QEWlQHGJdtgnHoagu43Zr9S2cS3XRMTMeWxQM8uzQS5e/I5awvjksIs1tvQfK4L18siutvclIrw0shsmjugSsdyOSMd6ED93cxf0bhXdqe8l6q9wbYcmaFY7x1cb3IbFX8XTn23ET98t0FwXD0PQzdKtZZ2oZUGnlsmY8SC7Oyy8ZWCB4xbU6ZpaLd53WSv8qE19/P3HsdM2zeDEYivXbX/Vm5reb/X141dGLTMKjZjhjl4t8SfFXMBa3HZpC3z4s77WG3dC+KuFvlTDWtlYOuZql9oMBhZ/FZ+s3qu7Llle+ZR57vH+nbRy4J3iludv1ExaGuGDB/qg/0UNozzqeMVsSK5lfe2OdadMHNEZd/fND9QGLWyk8esSL7cci7+KZJ6KUTv2HLoU41WcWmlk6TiNoTfOtf66rvSAR3Rvjn/f19uS49a3jfak3lq0b+JyGqSB2vS4oOotiyxe/sufuBoDL25swyjndG9ZF/N/0x9dW0S/5bqNfM25Idzx8gaevEpnE7dmXfKLITpFqGJdpFrey41d43JOHUfU0Bg8BQDN6lTDktFXmWpDK647YXhndI6YpcrYI7QSenng8ugiZ25l7GjxvwerJjaxWnahUW5OuE6PWtT8KGNxYaNavt6z8TzPsFUSS+l8IFZGTzz+7K/cYX52J02/38qXcuHCX/PUtY7bMMv3GuWVZZrXqYYNfxyEDX8cZKot5duG3kAfuzhvxz2VtRMSC1oPMzO8N8DN51i86Ajn+atItKkY7YSplF6oJe23fKRozE5K4sbNVj0r9uVdw0R9HLOenpFnr/MrSV8AABnzSURBVNXMxBGd0bFZblTqqdtiasVbtVNwTc/D9+uhIN8Db/ykR1SHtdu4+ZWM6gV5DYu/iliTsFfLSkdxaeKW4k3UapLJgJZo3NFLe1CQlmg6yTRLLHfGOkoHqJpOqM8p9WpkYefRs8hIMOcwFhz2URHLW8lMT8OgjsF0brmK4ju6GcN0861Jq6Xkue2sc5nGAD2zWPmJ7Xj+N0jlO5Qdx34ix/zLKqJHr7vFG3f1wISbOiGvrn8ZRl7D4q8ilndMBuvjHcfla2LowuCOTTBfUXOmpU6NnWTFaPS3lYes2ssvmjQMHZrpj4Q1HOFraaCW+W1lrrioIYomDUObhjV1tvD2pmlWJ9Qpr1Ud1S0a5ebgzt72ivGpCbqPRIbF3yI2J5JyhR9ZSBdU84licuqImjQO7FEysENjz+quBIlWWqn65h3WualhKYog7/c+rc1fN25MsiLjtCW9sJiaxwa1w/O3dMGV7cxNrsOEYPG3THDqn+9g5p92jWuZEjK7eF2/xm9G9W/tS/64Gxid+aa1qxlODiNjtZ6Sl0wc0dl4I4SKqt3as0VSpWH6AXf4WqBT89q255B1g3HXdcC1HRrj//71vaN27N4kMYuX2TMlbpFnnpq38SCemrEejWpV5ftHdb6aOJ12TvncX/VHtkvFBK14/0xqwOKvIpaIvXh7VxQdLcaCzYd8s0dJTmY6Bph8tVULvJ74WMkiifnQsKj+Xz0+AP9ethP/WGy9lLSfD5prOjTGNR0MOvlNGGTtPIf+b+fiSN+6NbKMN2J8JeiXZQ77WKBGdgY6Nqvt/vB7BXN+dbnm8vcV9ejtEp6NyHFLGm1blOQL6tdwrTbL8K7N8EaM2ZLcJjzIi6MMpgha5OKNeCkQyeIfZ7Rvop3Voa4iaRWiqniuMnQVS8AKYoyQVePlDT7uug546voOuutfGtkNgwzmSfUCS985Du73315zkamZptyCY/DaBD24S4bDPnHEiO769XXc8BZyMkPP+pJycwPVlBOgGGHncjYrnvdJcwwrZwOLKzyK+ZvFbGf7I1e3xZebD+HzdQe8M0ZBsiUBaDH5zu44dOp80GbYgj1/FWYuWK+u6XtMlrJdPe4ay20TCNkZoTzo82XuDIZRztjkZSgs3ojS8eTXOEck8wvA0M5N8X+XtbK0D4d9EhivXtti5VgrV9WpnoULG+kNqNHfX/b8z5fZK1Ghtu7X11wU/tzNxuQrdj3DRHQojW73sUPjY9YvL0jE38sPgj4tHPaJI2J5SOp1sx+93FLaKQEY3KkJphXsiSpHbNe+9IBcuqA9yahYtqmwT+yN5AdzvHL9Jc0s78Mx//gmvq+4OCU9RvE3K6hrocQUf5XCZKanxZzjVqupq9o3xrYJQ9CxmT3xv7Vn5MTcTgcEBe35uEbAX0Q+/J+Hd8J/HujjevuFE4bg5dutT/ZzW888AAiP/M5KT0PPgOr/MNGw52+Dug4zb2Qy1KV8Y7iQVp0otR7JXphcAfHiprnYtP+U7v6Nc6M7e2/r2QK39WyB/NGzItpMVSzVzHGzMR0GtGvoSeGxDJuTpdx+aUvcfmlViYatE4a4ZVJCEy+3DXv+NvjZFW0sbX9TN3OzZMX2/J1hdf/Hrm1nuI1jzz+Gx/zj3ubqugSB/K2txLK9zfaRj2Em/uSdHYw5GtUKOVZm57bwCvb8bXDFRdYmjHjx9q6Yvkp/YngZsx2+ZlBv7oX4eBXzL5o0THO5HdFNRLwov5HIaE2lmcg8fFVbtGpYQ3cKVr9g8VdwpqQcmw+cjljW/6KG+HrrYU+OF9VvKP1dMzsDZ0rK0bR2DvaflHOI/b21zQhQvLy+usl9l7XCtkOnY25jZ4SvH+l9fv0efs6ZO3VUH7RumFzVYrMy0nBTt7ygzWDxVzLx801Ry4Z0auKZ+KuRoyjyPTz31/0x7JVvsPvYOcc3tp6YO2lX603lnXsvNV14Lh4d+HExRhI7wQ9hlh8wC397hcGWsVn8+ytRXKKdDvzZI/3QsJb5wX9O4YJ03sHir+BMSXnAFkQqhDK8EY9OtlbMv5WDstOJQrx2dMtmtdadVMUcsTqNOzW3lynGxB8s/gr8vqXVoQChUXnNabjgkasuxA9Hih21oYfT0u92B3nFqfa6htWvJ59HM/sl+aljLMDZPgb42bl4WnrzGHhxqIRwdkaaIr5s77btf1FDvPpjcxN5WCUoDzjZO3xtw8rOWIA9f4ll249qFg5zWsrh8UHGKZMy9aqHaq4/e3MXPD6oXcQgLrP39Zpx10JA4P53CwCwUPqBmWvEz5g/w5iBPX+JO9761pN2r2qvP/mKUhAW//7K8DSNWRlpaFanmq3j1a6eiTrVjSfucFJx8QYbQ/1TnWTK9mGSAxb/AFHerE1rxxZ7Pxx4ZbG4WDry19suwbrx1zo+ntXnj3y+lA+uthYL3HmBG8LupAX5bLg5+TqT/LD4e4zZ+1Gv89Tu7WxHB+Y8ejmu69LUcLuM9DTUygludKLc1/D6XT0w+1Htmc/8JH7CPgxjHkfiT0RPE9FaIlpNRF8QUTPFujFEVEhEW4hokGJ5DyJaJ617heI1b07CjHf635/31V1n1it04zQo6+ubRWlfRnqarQE8fg9Tlz3/7Iw023Vn/MbKr2v5UgiXd7C4H5PSOL1znhdCdBFCdAXwGYBxAEBEHQCMBNARwGAAk4lI7r18DcAoAG2lf4Md2uApZiITl+bXs9W223HgiSO6WN7HaYf2qz/ujs8e6WdrX/nY9/drhaJJw9CgZpalIe/xMh1evHS0xosdTGLgKNtHCKEsC1kDVVp5I4CpQogSADuIqBBALyIqApArhFgGAEQ0BcBwALOd2BHPJJo3ZlVOh5kIExkhn6OCJ63PUJYo+PKC68K8Akzq4PidmYgmENFuAHdC8vwBNAewW7HZHmlZc+mzerle26OIqICICg4f9qfEglnMFndz61bzez5UPzQiWdJQTcX8fbCDdZ2xgqH4E9F8Ilqv8e9GABBCjBVCtADwPoCH5d00mhIxlmsihHhTCNFTCNGzYUNrlTStUFpufU5bO/F1NaYq8Eob+a2TSmFu5HEtF/ZGIwkqfOO0RDeTWBiGfYQQA0229QGAWQCeQsijV6pjHoB90vI8jeXxi657arYj19nhnd6ORm8MRkLz2SP90MSjkrpWH2jhks5uG+IQM2LtaT1/F9qY/ejlqF/DeHwIkzw4zfZpq/jzBgCbpc8zAYwkomwiaoVQx+5yIcR+AKeJqI+U5XM3gBlObIh/EtObksWqU/PaaFDTY8/f09a9x1yqpw+DvBzse3HTXDTKTa66+UxsnJZ3mERE7QBUAtgJ4OcAIITYQETTAGwEUA7gISGEXCP2QQDvAKiGUEdv0nb2mqF7yzpYueuE4XbJEh9XYvc7xcvDgrNrmETGabbPzTHWTQAwQWN5AYBOTo7rNrE8N6eaa+TwvXd/bxwrLo3RgM3j2txxzJD2EEJgaGfnWTzJjlepptwF4g9tG9XEtkNngjYjMLiwm8cY3cfVszJQPcv9n8GuMDXKzcFLI7vZPm6daqG48aCOjQ23ZZFzByuZYHzKq5j5cD8UlwY9h0dwsPgjdvjBy3BLMma51K6eiRVPDjRZXM7eMeIlAhYvYZ8a2RkoLq1IyuvJS6plpaNaVrrxhkkKi7/HxLohvbxVgxSm+lY7iE2aGj6X8aL+NqiWqS02OTrLzfDhz/pi/saDqJnNtzNjnsQojJLAOJXgBjVCQpqRhDnYtmPmCXoqJo3ojFm/1C6FMbyr/TLZrRrUwAP9W9ven0lN2FVAcJk0Zt7SX72zO77YeCBc6z8ZiZfwideM7NVSd11GehoGd2yCORsOpMjZYIKGPX8YZPvoPBnMhldjbWfmodOwVjbu7H2BuYNpHcP2nt5j9aF772X5AICnru8IAOjYLLkmE/ejUB13CzAy7PnDoMPXYduxJtiIB2EOUgwGdWyC5+duMT0z2HVdmuG6LqFtb+mRZ7C1fyTjGAwm+Ulpzz9/9Cw8+N4KU9tmZdg7VWkxYvWVlT54ep4fwT4XNqqJoknD0KFZbtCmpBz9LmwQtAlMwKS0+APA7PUHDD3wmQ9fhm9+d6WlduUiWekxXOsKH8SfnVLvcfvtiUMzjB9w2AfGg2S65NWx3Gal1GZajMdrBccLGIYJiJT3/I2wPRBJ2i+W5+9H2IdhtIiXWdCY4GDxR+zQiLOCzrFrpHvq+RsYyC8d8Qf/JoyfsPjD25su6A5fJhHhoD/jPSz+HiOHff54Qyg3vYailgjH/JODRPoZU2VAHWMMiz/gaUqMHPZp27gmgMhaP5XWZ490Hc4sYZjUhMUf9kb4miXWIK/KRHIZGV3ceoDy1cD4CYu/x8Tq8K3Hc6YmBfwMZxIRFn8AB06dd6Wdu/tG1+BRa39OZtUp/9sd9idNMQsLk3ckcsiMrwuGxR/ADX9f4riNoknD8Kcbo2enVNfzv7BRTbx4+yVYM+5a63XvXeTBAW0AAM3rVAvMhkTHKwH18qGSyA8sxl14hC+A0nIfel4VQnFTt+CLkg3v1hzDuzUP2gyGYQKCPX8D9Lw7oynznh7eCa0DrMHPDp73sBfNJDIs/gbk1bUXFvlJnwuw8LEBVQt8FgoO6SYefsTh69cMJRm0b8KVVFMdFn8DBndqErQJTJzw0u1dcW2Hxp4fx0s/oX2TXPzvwb4YM7S9h0dhEgGO+RtgFN6JVxLT6vgmWfpJelxQL2gTmDiAxd8mlsXVw1f6p67vgBrZ8f1TNqiZhSNnSoM2g2EYifhWDMYU917WKmgTDFn0+JUoKasI2gxXcT9Gzz01jH+w+NukdcPgMnmsEC9122tmZ6BmnL+dMEwqwR2+NmndsKat/biqYvKQoN1BDAOAxd93/PbE+WGTeCRqkgGTWLD4JznxEvZhjOF6O4yfsPgnKew8MgwTCxZ/hmGYFITF32c4Bp98uB2u4SuE8QMWf5/xPQbPcWSGYTRg8WcYh3D/CpOIsPj7jO9hHxYmz3Er7MMvaYyfsPgnO6wonsHPVSaRYfFnGJvwc5VJZFJO/M+XVWDT/lNBm8EwuvjZh9C3dX3/DsbEFa6IPxE9RkSCiBoolo0hokIi2kJEgxTLexDROmndK+TzWPYnPl6HIS9/gyNnShy39dLtXfGvey91wSr34ZRS73H7DAufh/iueepavHNffF6/jPc4LrNIRC0AXANgl2JZBwAjAXQE0AzAfCK6SAhRAeA1AKMAfAvgcwCDAcx2aodZCnYeBwAUl5Q7bisZJvZg7JPoYZ/a1TKDNoEJEDc8/xcB/A6R98KNAKYKIUqEEDsAFALoRURNAeQKIZaJkJszBcBwF2xgmMDgVE8mEXEk/kR0A4C9Qog1qlXNAexW/L1HWtZc+qxertf+KCIqIKKCw4cPOzGVYTzDrWhNg5rZAIDqWTzvAeM9hlcZEc0HoDWL+VgATwC4Vms3jWUixnJNhBBvAngTAHr27Jnob9lMkuG2wz/+ho7ocUFd9GnNc+wy3mMo/kKIgVrLiagzgFYA1kh9tnkAVhJRL4Q8+haKzfMA7JOW52ksZ5iEw21vpEZ2Bkb2aulyqwyjje2wjxBinRCikRAiXwiRj5CwdxdCHAAwE8BIIsomolYA2gJYLoTYD+A0EfWRsnzuBjDD+ddg9ODXJYZhtPAkuCiE2EBE0wBsBFAO4CEp0wcAHgTwDoBqCGX5+JbpEyQswskH9/MyiYxr4i95/8q/JwCYoLFdAYBObh2XYRiGsU7KjfANCr+9RE4/ZBgmFiz+PuF32Ifng2UYJhYpJ/7yZCpc/oBhmFQm5cRfxvcZtXyGwz7+kezXEpOcpJz4p4rHP6RTaFxey3rVA7aEYZh4hMeR+4Tfj5y7+lyAm3vkcakAH0gVh4JJLlLO8w8KvwMDRMTC7xMc9mESERZ/hmGYFCTlxJ+9NMZtOOzDJCIpJ/4yQd2wnIWTfLBDwSQiKSv+DMMwqUzKib/s8bO3xjBMKpNy4i8TVPkDLruQfHDMn0lEUlb8K1mFGZfgt0gmEUlZ8Q/qduUO3+SBf0smkUk58Ze9NHb8GafwNcQkMikn/lXwncswTOqSsuJfydrPOITDPkwik3LiH0719Fn82zSsCQAY3q25vwdmGIbRIGUrf5nJ9umSV9u14zWpnYOiScNca49hGMYJKef5y5jx/Gc+3M97QxiGYQIgdcWfO3wZhklhUk78OdWTcRu+lphEJOXEX4ZvWIZhUpnUFX8O+zAuwSmfTCKSsuLPef6MW/BbJJOIpJz4V+X58x3LMEzqknLiL8OeP+MWHPZhEpGUFX+u7cMwTCqTcuLPqZ6M2/C1xCQiKSf+Mhz2YRgmlUlZ8ecOX8YtOObPJCIpK/7s+TNuwX4Ek4iknPiHUz25w5dxDLv8TOKScuIfhrWfcQxfREziknLiH872CdgOhmGYIEk58ZcxM5kLw8SGwz5M4pKy4s/azzBMKpOy4s+eP8MwqYwj8Sei8US0l4hWS/+GKtaNIaJCItpCRIMUy3sQ0Tpp3StEwWRJs/QzDJPKuOH5vyiE6Cr9+xwAiKgDgJEAOgIYDGAyEaVL278GYBSAttK/wS7YYBo51ZPVn2GYVCbDo3ZvBDBVCFECYAcRFQLoRURFAHKFEMsAgIimABgOYLZHduD+d7/HzqNnw3/vOhb6PHb6Oq8OyaQIGWkhRyIrI2Wjp0wC44b4P0xEdwMoAPBbIcRxAM0BfKvYZo+0rEz6rF6uCRGNQugtAS1btrRlXMt6NSJuzsa5OVhceARdW9bBkU2H0KJuNWw/XIxqmeno3boeOjbLxRUXNcLOo8W2jvfPe3qirIJfK+KFKff1wslzZZ60fWX7RnhwQBs8cHlrT9pnGC8hoxo3RDQfQBONVWMREvgjCAVRngbQVAhxHxG9CmCZEOI9qY1/AvgcwC4AE4UQA6XllwP4nRDieiNDe/bsKQoKCkx/MYZhGAYgohVCiJ7q5YaevyzUJg7wFoDPpD/3AGihWJ0HYJ+0PE9jOcMwDOMjTrN9mir+vAnAeunzTAAjiSibiFoh1LG7XAixH8BpIuojZfncDWCGExsYhmEY6ziN+T9HRF0RCvsUAfgZAAghNhDRNAAbAZQDeEgIUSHt8yCAdwBUQ6ij17POXoZhGEYbw5h/vMAxf4ZhGOvoxfw5R41hGCYFYfFnGIZJQVj8GYZhUhAWf4ZhmBQkYTp8iegwgJ02d2+A0GC0eCdR7AQSx1a2030SxVa2U2pXCBFVQy1hxN8JRFSg1dsdbySKnUDi2Mp2uk+i2Mp2xobDPgzDMCkIiz/DMEwKkiri/2bQBpgkUewEEsdWttN9EsVWtjMGKRHzZxiGYSJJFc+fYRiGUcDizzAMk4IktfgT0WBpAvlCIhodsC0tiOhLItpERBuI6FFp+Xgi2ktEq6V/QxX7jJFs30JEg3y2t4iI1kk2FUjL6hHRPCLaJv1fN0hbiaid4rytJqJTRPSreDmnRPQ2ER0iovWKZZbPIRH1kH6LQiJ6RSqH7rWdzxPRZiJaS0TTiaiOtDyfiM4pzu3rAdtp+bf22s4Ytn6osLOIiFZLy4M5p0KIpPwHIB3AdgCtAWQBWAOgQ4D2NAXQXfpcC8BWAB0AjAfwmMb2HSSbswG0kr5Luo/2FgFooFr2HIDR0ufRAJ6NB1sVv/cBABfEyzkF0B9AdwDrnZxDAMsB9AVACJVAH+KDndcCyJA+P6uwM1+5naqdIOy0/Ft7baeerar1fwEwLshzmsyefy8AhUKIH4QQpQCmIjSxfCAIIfYLIVZKn08D2IQY8xcjZOtUIUSJEGIHgEKEvlOQ3AjgXenzuwCGK5YHbevVALYLIWKNAvfVTiHE1wCOadhg+hxSaMKkXCHEMhFSgymKfTyzUwjxhRCiXPrzW0TOwBdFUHbGILDzaWSr5L3fBuA/sdrw2tZkFv/mAHYr/o45WbyfEFE+gG4AvpMWPSy9Xr+tCAMEbb8A8AURrSCiUdKyxiI0Gxuk/xtJy4O2FQBGIvJmisdzClg/h82lz+rlfnIfIiddakVEq4joKwrNww0Ea6eV3zoezuflAA4KIbYplvl+TpNZ/LViY4HntRJRTQD/A/ArIcQpAK8BaAOgK4D9CL0OAsHbf5kQojuAIQAeIqL+MbYN1FYiygJwA4D/Sovi9ZzGQs+2oM/tWIRm43tfWrQfQEshRDcAvwHwARHlIjg7rf7W8XAN3IFIRyWQc5rM4q83iXxgEFEmQsL/vhDiYwAQQhwUQlQIISoBvIWqMESg9gsh9kn/HwIwXbLroPQqKr+SHooHWxF6QK0UQhwE4vecSlg9h3sQGXLxzWYiugfAdQDulMIOkMIoR6XPKxCKpV8UlJ02fuvAzicAEFEGgBEAPpSXBXVOk1n8vwfQlohaSZ7hSIQmlg8EKc73TwCbhBB/VSxvqtjsJgBydsBMACOJKJuIWgFoi1Dnjx+21iCiWvJnhDr/1ks23SNtdg+AGUHbKhHhScXjOVVg6RxKoaHTRNRHuobuVuzjGUQ0GMDvAdwghDirWN6QiNKlz60lO38I0E5Lv3VQdioYCGCzECIczgnsnLrdyx1P/wAMRSirZjuAsQHb0g+hV7a1AFZL/4YC+DeAddLymQCaKvYZK9m+BR5kJMSwtTVCmRJrAGyQzx2A+gAWANgm/V8vDmytDuAogNqKZXFxThF6IO0HUIaQF/dTO+cQQE+ERG07gL9DGpnvsZ2FCMXM5Wv1dWnbm6VrYg2AlQCuD9hOy7+113bq2SotfwfAz1XbBnJOubwDwzBMCpLMYR+GYRhGBxZ/hmGYFITFn2EYJgVh8WcYhklBWPwZhmFSEBZ/hmGYFITFn2EYJgX5f6quoV1jutnlAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env.unwrapped, agent,\n",
    "            max_episode_steps=env._max_episode_steps, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-10:]) > -120:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "env.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
